1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <fmt/format.h>
25 
26 #include <phosphor-logging/log.hpp>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37 
38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40          System& system) :
41     _bus(bus),
42     _name(def.name), _deviation(def.deviation),
43     _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
44     _trustManager(trust),
45 #ifdef MONITOR_USE_JSON
46     _monitorDelay(def.monitorStartDelay),
47     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
48 #endif
49     _system(system),
50     _presenceMatch(bus,
51                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
52                                             util::INV_ITEM_IFACE),
53                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
54                              std::placeholders::_1)),
55     _presenceIfaceAddedMatch(
56         bus,
57         rules::interfacesAdded() +
58             rules::argNpath(0, util::INVENTORY_PATH + _name),
59         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
60                   std::placeholders::_1)),
61     _fanMissingErrorDelay(def.fanMissingErrDelay),
62     _setFuncOnPresent(def.funcOnPresent)
63 {
64     // Setup tach sensors for monitoring
65     for (const auto& s : def.sensorList)
66     {
67         _sensors.emplace_back(std::make_shared<TachSensor>(
68             mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
69             s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
70             s.threshold, s.ignoreAboveMax, def.timeout,
71             def.nonfuncRotorErrDelay, def.countInterval, event));
72 
73         _trustManager->registerSensor(_sensors.back());
74     }
75 
76     bool functionalState =
77         (_numSensorFailsForNonFunc == 0) ||
78         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
79 
80     if (updateInventory(functionalState) && !functionalState)
81     {
82         // the inventory update threw an exception, possibly because D-Bus
83         // wasn't ready. Try to update sensors back to functional to avoid a
84         // false-alarm. They will be updated again from subscribing to the
85         // properties-changed event
86 
87         for (auto& sensor : _sensors)
88             sensor->setFunctional(true);
89     }
90 
91 #ifndef MONITOR_USE_JSON
92     // Check current tach state when entering monitor mode
93     if (mode != Mode::init)
94     {
95         _monitorReady = true;
96 
97         // The TachSensors will now have already read the input
98         // and target values, so check them.
99         tachChanged();
100     }
101 #else
102     if (_system.isPowerOn())
103     {
104         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
105     }
106 #endif
107 
108     if (_fanMissingErrorDelay)
109     {
110         _fanMissingErrorTimer = std::make_unique<
111             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
112             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
113                              std::ref(*this)));
114     }
115 
116     try
117     {
118         _present = util::SDBusPlus::getProperty<bool>(
119             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
120 
121         if (!_present)
122         {
123             getLogger().log(
124                 fmt::format("On startup, fan {} is missing", _name));
125             if (_system.isPowerOn() && _fanMissingErrorTimer)
126             {
127                 _fanMissingErrorTimer->restartOnce(
128                     std::chrono::seconds{*_fanMissingErrorDelay});
129             }
130         }
131     }
132     catch (const util::DBusServiceError& e)
133     {
134         // This could happen on the first BMC boot if the presence
135         // detect app hasn't started yet and there isn't an inventory
136         // cache yet.
137     }
138 }
139 
140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
141 {
142     sdbusplus::message::object_path path;
143     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
144 
145     msg.read(path, interfaces);
146 
147     auto properties = interfaces.find(util::INV_ITEM_IFACE);
148     if (properties == interfaces.end())
149     {
150         return;
151     }
152 
153     auto property = properties->second.find("Present");
154     if (property == properties->second.end())
155     {
156         return;
157     }
158 
159     _present = std::get<bool>(property->second);
160 
161     if (!_present)
162     {
163         getLogger().log(fmt::format(
164             "New fan {} interface added and fan is not present", _name));
165         if (_system.isPowerOn() && _fanMissingErrorTimer)
166         {
167             _fanMissingErrorTimer->restartOnce(
168                 std::chrono::seconds{*_fanMissingErrorDelay});
169         }
170     }
171 
172     _system.fanStatusChange(*this);
173 }
174 
175 void Fan::startMonitor()
176 {
177     _monitorReady = true;
178 
179     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
180         if (_present)
181         {
182             try
183             {
184                 // Force a getProperty call to check if the tach sensor is
185                 // on D-Bus.  If it isn't, now set it to nonfunctional.
186                 // This isn't done earlier so that code watching for
187                 // nonfunctional tach sensors doesn't take actions before
188                 // those sensors show up on D-Bus.
189                 sensor->updateTachAndTarget();
190                 tachChanged(*sensor);
191             }
192             catch (const util::DBusServiceError& e)
193             {
194                 // The tach property still isn't on D-Bus. Ensure
195                 // sensor is nonfunctional, but skip creating an
196                 // error for it since it isn't a fan problem.
197                 getLogger().log(fmt::format(
198                     "Monitoring starting but {} sensor value not on D-Bus",
199                     sensor->name()));
200 
201                 sensor->setFunctional(false, true);
202 
203                 if (_numSensorFailsForNonFunc)
204                 {
205                     if (_functional && (countNonFunctionalSensors() >=
206                                         _numSensorFailsForNonFunc))
207                     {
208                         updateInventory(false);
209                     }
210                 }
211 
212                 // At this point, don't start any power off actions due
213                 // to missing sensors.  Let something else handle that
214                 // policy.
215                 _system.fanStatusChange(*this, true);
216             }
217         }
218     });
219 }
220 
221 void Fan::tachChanged()
222 {
223     if (_monitorReady)
224     {
225         for (auto& s : _sensors)
226         {
227             tachChanged(*s);
228         }
229     }
230 }
231 
232 void Fan::tachChanged(TachSensor& sensor)
233 {
234     if (!_system.isPowerOn() || !_monitorReady)
235     {
236         return;
237     }
238 
239     if (_trustManager->active())
240     {
241         if (!_trustManager->checkTrust(sensor))
242         {
243             return;
244         }
245     }
246 
247     // If the error checking method is 'count', if a tach change leads
248     // to an out of range sensor the count timer will take over in calling
249     // process() until the sensor is healthy again.
250     if (!sensor.countTimerRunning())
251     {
252         process(sensor);
253     }
254 }
255 
256 void Fan::countTimerExpired(TachSensor& sensor)
257 {
258     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
259     {
260         return;
261     }
262     process(sensor);
263 }
264 
265 void Fan::process(TachSensor& sensor)
266 {
267     // If this sensor is out of range at this moment, start
268     // its timer, at the end of which the inventory
269     // for the fan may get updated to not functional.
270 
271     // If this sensor is OK, put everything back into a good state.
272 
273     if (outOfRange(sensor))
274     {
275         if (sensor.functional())
276         {
277             switch (sensor.getMethod())
278             {
279                 case MethodMode::timebased:
280                     // Start nonfunctional timer if not already running
281                     sensor.startTimer(TimerMode::nonfunc);
282                     break;
283                 case MethodMode::count:
284 
285                     if (!sensor.countTimerRunning())
286                     {
287                         sensor.startCountTimer();
288                     }
289                     sensor.setCounter(true);
290                     if (sensor.getCounter() >= sensor.getThreshold())
291                     {
292                         updateState(sensor);
293                     }
294                     break;
295             }
296         }
297     }
298     else
299     {
300         switch (sensor.getMethod())
301         {
302             case MethodMode::timebased:
303                 if (sensor.functional())
304                 {
305                     if (sensor.timerRunning())
306                     {
307                         sensor.stopTimer();
308                     }
309                 }
310                 else
311                 {
312                     // Start functional timer if not already running
313                     sensor.startTimer(TimerMode::func);
314                 }
315                 break;
316             case MethodMode::count:
317                 sensor.setCounter(false);
318                 if (sensor.getCounter() == 0)
319                 {
320                     if (!sensor.functional())
321                     {
322                         updateState(sensor);
323                     }
324 
325                     sensor.stopCountTimer();
326                 }
327                 break;
328         }
329     }
330 }
331 
332 uint64_t Fan::findTargetSpeed()
333 {
334     uint64_t target = 0;
335     // The sensor doesn't support a target,
336     // so get it from another sensor.
337     auto s = std::find_if(_sensors.begin(), _sensors.end(),
338                           [](const auto& s) { return s->hasTarget(); });
339 
340     if (s != _sensors.end())
341     {
342         target = (*s)->getTarget();
343     }
344 
345     return target;
346 }
347 
348 size_t Fan::countNonFunctionalSensors() const
349 {
350     return std::count_if(_sensors.begin(), _sensors.end(),
351                          [](const auto& s) { return !s->functional(); });
352 }
353 
354 bool Fan::outOfRange(const TachSensor& sensor)
355 {
356     if (!sensor.hasOwner())
357     {
358         return true;
359     }
360 
361     auto actual = static_cast<uint64_t>(sensor.getInput());
362     auto range = sensor.getRange(_deviation);
363 
364     return ((actual < range.first) ||
365             (range.second && actual > range.second.value()));
366 }
367 
368 void Fan::updateState(TachSensor& sensor)
369 {
370     if (!_system.isPowerOn())
371     {
372         return;
373     }
374 
375     auto range = sensor.getRange(_deviation);
376     std::string rangeMax = "NoMax";
377     if (range.second)
378     {
379         rangeMax = std::to_string(range.second.value());
380     }
381 
382     // Skip starting the error timer if the sensor
383     // isn't on D-Bus as this isn't a fan hardware problem.
384     sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
385 
386     getLogger().log(fmt::format(
387         "Setting tach sensor {} functional state to {}. "
388         "[target = {}, actual = {}, allowed range = ({} - {}) "
389         "owned = {}]",
390         sensor.name(), sensor.functional(), sensor.getTarget(),
391         sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
392 
393     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
394     // with fan FRU functional status, only sensor functional status.
395     if (_numSensorFailsForNonFunc)
396     {
397         auto numNonFuncSensors = countNonFunctionalSensors();
398         // If the fan was nonfunctional and enough sensors are now OK,
399         // the fan can be set to functional as long as `set_func_on_present` was
400         // not set
401         if (!_setFuncOnPresent && !_functional &&
402             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
403         {
404             getLogger().log(fmt::format("Setting fan {} to functional, number "
405                                         "of nonfunctional sensors = {}",
406                                         _name, numNonFuncSensors));
407             updateInventory(true);
408         }
409 
410         // If the fan is currently functional, but too many
411         // contained sensors are now nonfunctional, update
412         // the fan to nonfunctional.
413         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
414         {
415             getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
416                                         "number of nonfunctional sensors = {}",
417                                         _name, numNonFuncSensors));
418             updateInventory(false);
419         }
420     }
421 
422     // Skip the power off rule checks if the sensor isn't
423     // on D-Bus so a running system isn't shutdown.
424     _system.fanStatusChange(*this, !sensor.hasOwner());
425 }
426 
427 bool Fan::updateInventory(bool functional)
428 {
429     bool dbusError = false;
430 
431     try
432     {
433         auto objectMap =
434             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
435                                   util::FUNCTIONAL_PROPERTY, functional);
436 
437         auto response = util::SDBusPlus::callMethod(
438             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
439             util::INVENTORY_INTF, "Notify", objectMap);
440 
441         if (response.is_method_error())
442         {
443             log<level::ERR>("Error in Notify call to update inventory");
444 
445             dbusError = true;
446         }
447     }
448     catch (const util::DBusError& e)
449     {
450         dbusError = true;
451 
452         getLogger().log(
453             fmt::format("D-Bus Exception reading/updating inventory : {}",
454                         e.what()),
455             Logger::error);
456     }
457 
458     // This will always track the current state of the inventory.
459     _functional = functional;
460 
461     return dbusError;
462 }
463 
464 void Fan::presenceChanged(sdbusplus::message_t& msg)
465 {
466     std::string interface;
467     std::map<std::string, std::variant<bool>> properties;
468 
469     msg.read(interface, properties);
470 
471     auto presentProp = properties.find("Present");
472     if (presentProp != properties.end())
473     {
474         _present = std::get<bool>(presentProp->second);
475 
476         getLogger().log(
477             fmt::format("Fan {} presence state change to {}", _name, _present));
478 
479         if (_present && _setFuncOnPresent)
480         {
481             updateInventory(true);
482             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
483                 sensor->setFunctional(true);
484                 sensor->resetMethod();
485             });
486         }
487 
488         _system.fanStatusChange(*this);
489 
490         if (_fanMissingErrorDelay)
491         {
492             if (!_present && _system.isPowerOn())
493             {
494                 _fanMissingErrorTimer->restartOnce(
495                     std::chrono::seconds{*_fanMissingErrorDelay});
496             }
497             else if (_present && _fanMissingErrorTimer->isEnabled())
498             {
499                 _fanMissingErrorTimer->setEnabled(false);
500             }
501         }
502     }
503 }
504 
505 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
506 {
507     if (_present && _system.isPowerOn())
508     {
509         _system.sensorErrorTimerExpired(*this, sensor);
510     }
511 }
512 
513 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
514 {
515 #ifdef MONITOR_USE_JSON
516     if (powerStateOn)
517     {
518         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
519 
520         _numSensorsOnDBusAtPowerOn = 0;
521 
522         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
523             try
524             {
525                 // Force a getProperty call.  If sensor is on D-Bus,
526                 // then make sure it's functional.
527                 sensor->updateTachAndTarget();
528 
529                 _numSensorsOnDBusAtPowerOn++;
530 
531                 if (_present)
532                 {
533                     // If not functional, set it back to functional.
534                     if (!sensor->functional())
535                     {
536                         sensor->setFunctional(true);
537                         _system.fanStatusChange(*this, true);
538                     }
539 
540                     // Set the counters back to zero
541                     if (sensor->getMethod() == MethodMode::count)
542                     {
543                         sensor->resetMethod();
544                     }
545                 }
546             }
547             catch (const util::DBusError& e)
548             {
549                 // Properties still aren't on D-Bus.  Let startMonitor()
550                 // deal with it, or maybe System::powerStateChanged() if
551                 // there aren't any sensors at all on D-Bus.
552                 getLogger().log(fmt::format(
553                     "At power on, tach sensor {} value not on D-Bus",
554                     sensor->name()));
555             }
556         });
557 
558         if (_present)
559         {
560             // If configured to change functional state on the fan itself,
561             // Set it back to true now if necessary.
562             if (_numSensorFailsForNonFunc)
563             {
564                 if (!_functional &&
565                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
566                 {
567                     updateInventory(true);
568                 }
569             }
570         }
571         else
572         {
573             getLogger().log(
574                 fmt::format("At power on, fan {} is missing", _name));
575 
576             if (_fanMissingErrorTimer)
577             {
578                 _fanMissingErrorTimer->restartOnce(
579                     std::chrono::seconds{*_fanMissingErrorDelay});
580             }
581         }
582     }
583     else
584     {
585         _monitorReady = false;
586 
587         if (_monitorTimer.isEnabled())
588         {
589             _monitorTimer.setEnabled(false);
590         }
591 
592         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
593         {
594             _fanMissingErrorTimer->setEnabled(false);
595         }
596 
597         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
598             if (sensor->timerRunning())
599             {
600                 sensor->stopTimer();
601             }
602 
603             sensor->stopCountTimer();
604         });
605     }
606 #endif
607 }
608 
609 } // namespace monitor
610 } // namespace fan
611 } // namespace phosphor
612