1 /**
2 * Copyright © 2022 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "fan.hpp"
17
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23
24 #include <phosphor-logging/log.hpp>
25
26 #include <format>
27
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37
Fan(Mode mode,sdbusplus::bus_t & bus,const sdeventplus::Event & event,std::unique_ptr<trust::Manager> & trust,const FanDefinition & def,System & system)38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40 System& system) :
41 _bus(bus), _name(def.name), _deviation(def.deviation),
42 _upperDeviation(def.upperDeviation),
43 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
44 _trustManager(trust),
45 #ifdef MONITOR_USE_JSON
46 _monitorDelay(def.monitorStartDelay),
47 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
48 #endif
49 _system(system),
50 _presenceMatch(bus,
51 rules::propertiesChanged(util::INVENTORY_PATH + _name,
52 util::INV_ITEM_IFACE),
53 std::bind(std::mem_fn(&Fan::presenceChanged), this,
54 std::placeholders::_1)),
55 _presenceIfaceAddedMatch(
56 bus,
57 rules::interfacesAdded() +
58 rules::argNpath(0, util::INVENTORY_PATH + _name),
59 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
60 std::placeholders::_1)),
61 _fanMissingErrorDelay(def.fanMissingErrDelay),
62 _setFuncOnPresent(def.funcOnPresent)
63 {
64 // Setup tach sensors for monitoring
65 for (const auto& s : def.sensorList)
66 {
67 _sensors.emplace_back(std::make_shared<TachSensor>(
68 mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
69 s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
70 s.threshold, s.ignoreAboveMax, def.timeout,
71 def.nonfuncRotorErrDelay, def.countInterval, event));
72
73 _trustManager->registerSensor(_sensors.back());
74 }
75
76 bool functionalState =
77 (_numSensorFailsForNonFunc == 0) ||
78 (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
79
80 if (updateInventory(functionalState) && !functionalState)
81 {
82 // the inventory update threw an exception, possibly because D-Bus
83 // wasn't ready. Try to update sensors back to functional to avoid a
84 // false-alarm. They will be updated again from subscribing to the
85 // properties-changed event
86
87 for (auto& sensor : _sensors)
88 sensor->setFunctional(true);
89 }
90
91 #ifndef MONITOR_USE_JSON
92 // Check current tach state when entering monitor mode
93 if (mode != Mode::init)
94 {
95 _monitorReady = true;
96
97 // The TachSensors will now have already read the input
98 // and target values, so check them.
99 tachChanged();
100 }
101 #else
102 if (_system.isPowerOn())
103 {
104 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
105 }
106 #endif
107
108 if (_fanMissingErrorDelay)
109 {
110 _fanMissingErrorTimer = std::make_unique<
111 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
112 event, std::bind(&System::fanMissingErrorTimerExpired, &system,
113 std::ref(*this)));
114 }
115
116 try
117 {
118 _present = util::SDBusPlus::getProperty<bool>(
119 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
120
121 if (!_present)
122 {
123 getLogger().log(
124 std::format("On startup, fan {} is missing", _name));
125 if (_system.isPowerOn() && _fanMissingErrorTimer)
126 {
127 _fanMissingErrorTimer->restartOnce(
128 std::chrono::seconds{*_fanMissingErrorDelay});
129 }
130 }
131 }
132 catch (const util::DBusServiceError& e)
133 {
134 // This could happen on the first BMC boot if the presence
135 // detect app hasn't started yet and there isn't an inventory
136 // cache yet.
137 }
138 }
139
presenceIfaceAdded(sdbusplus::message_t & msg)140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
141 {
142 sdbusplus::message::object_path path;
143 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
144
145 msg.read(path, interfaces);
146
147 auto properties = interfaces.find(util::INV_ITEM_IFACE);
148 if (properties == interfaces.end())
149 {
150 return;
151 }
152
153 auto property = properties->second.find("Present");
154 if (property == properties->second.end())
155 {
156 return;
157 }
158
159 _present = std::get<bool>(property->second);
160
161 if (!_present)
162 {
163 getLogger().log(std::format(
164 "New fan {} interface added and fan is not present", _name));
165 if (_system.isPowerOn() && _fanMissingErrorTimer)
166 {
167 _fanMissingErrorTimer->restartOnce(
168 std::chrono::seconds{*_fanMissingErrorDelay});
169 }
170 }
171
172 _system.fanStatusChange(*this);
173 }
174
startMonitor()175 void Fan::startMonitor()
176 {
177 _monitorReady = true;
178
179 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
180 try
181 {
182 // Force a getProperty call to check if the tach sensor is
183 // on D-Bus. If it isn't, now set it to nonfunctional.
184 // This isn't done earlier so that code watching for
185 // nonfunctional tach sensors doesn't take actions before
186 // those sensors show up on D-Bus.
187 sensor->updateTachAndTarget();
188 tachChanged(*sensor);
189 }
190 catch (const util::DBusServiceError& e)
191 {
192 // The tach property still isn't on D-Bus. Ensure
193 // sensor is nonfunctional, but skip creating an
194 // error for it since it isn't a fan problem.
195 getLogger().log(std::format(
196 "Monitoring starting but {} sensor value not on D-Bus",
197 sensor->name()));
198
199 sensor->setFunctional(false, true);
200
201 if (_numSensorFailsForNonFunc)
202 {
203 if (_functional &&
204 (countNonFunctionalSensors() >= _numSensorFailsForNonFunc))
205 {
206 updateInventory(false);
207 }
208 }
209
210 // At this point, don't start any power off actions due
211 // to missing sensors. Let something else handle that
212 // policy.
213 _system.fanStatusChange(*this, true);
214 }
215 });
216 }
217
tachChanged()218 void Fan::tachChanged()
219 {
220 if (_monitorReady)
221 {
222 for (auto& s : _sensors)
223 {
224 tachChanged(*s);
225 }
226 }
227 }
228
tachChanged(TachSensor & sensor)229 void Fan::tachChanged(TachSensor& sensor)
230 {
231 if (!_system.isPowerOn() || !_monitorReady)
232 {
233 return;
234 }
235
236 if (_trustManager->active())
237 {
238 if (!_trustManager->checkTrust(sensor))
239 {
240 return;
241 }
242 }
243
244 // If the error checking method is 'count', if a tach change leads
245 // to an out of range sensor the count timer will take over in calling
246 // process() until the sensor is healthy again.
247 if (!sensor.countTimerRunning())
248 {
249 process(sensor);
250 }
251 }
252
countTimerExpired(TachSensor & sensor)253 void Fan::countTimerExpired(TachSensor& sensor)
254 {
255 if (_trustManager->active() && !_trustManager->checkTrust(sensor))
256 {
257 return;
258 }
259 process(sensor);
260 }
261
process(TachSensor & sensor)262 void Fan::process(TachSensor& sensor)
263 {
264 // If this sensor is out of range at this moment, start
265 // its timer, at the end of which the inventory
266 // for the fan may get updated to not functional.
267
268 // If this sensor is OK, put everything back into a good state.
269
270 if (outOfRange(sensor))
271 {
272 if (sensor.functional())
273 {
274 switch (sensor.getMethod())
275 {
276 case MethodMode::timebased:
277 // Start nonfunctional timer if not already running
278 sensor.startTimer(TimerMode::nonfunc);
279 break;
280 case MethodMode::count:
281
282 if (!sensor.countTimerRunning())
283 {
284 sensor.startCountTimer();
285 }
286 sensor.setCounter(true);
287 if (sensor.getCounter() >= sensor.getThreshold())
288 {
289 updateState(sensor);
290 }
291 break;
292 }
293 }
294 }
295 else
296 {
297 switch (sensor.getMethod())
298 {
299 case MethodMode::timebased:
300 if (sensor.functional())
301 {
302 if (sensor.timerRunning())
303 {
304 sensor.stopTimer();
305 }
306 }
307 else
308 {
309 // Start functional timer if not already running
310 sensor.startTimer(TimerMode::func);
311 }
312 break;
313 case MethodMode::count:
314 sensor.setCounter(false);
315 if (sensor.getCounter() == 0)
316 {
317 if (!sensor.functional())
318 {
319 updateState(sensor);
320 }
321
322 sensor.stopCountTimer();
323 }
324 break;
325 }
326 }
327 }
328
findTargetSpeed()329 uint64_t Fan::findTargetSpeed()
330 {
331 uint64_t target = 0;
332 // The sensor doesn't support a target,
333 // so get it from another sensor.
334 auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
335 return s->hasTarget();
336 });
337
338 if (s != _sensors.end())
339 {
340 target = (*s)->getTarget();
341 }
342
343 return target;
344 }
345
countNonFunctionalSensors() const346 size_t Fan::countNonFunctionalSensors() const
347 {
348 return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
349 return !s->functional();
350 });
351 }
352
outOfRange(const TachSensor & sensor)353 bool Fan::outOfRange(const TachSensor& sensor)
354 {
355 if (!sensor.hasOwner())
356 {
357 return true;
358 }
359
360 auto actual = static_cast<uint64_t>(sensor.getInput());
361 auto range = sensor.getRange(_deviation, _upperDeviation);
362
363 return ((actual < range.first) ||
364 (range.second && actual > range.second.value()));
365 }
366
updateState(TachSensor & sensor)367 void Fan::updateState(TachSensor& sensor)
368 {
369 if (!_system.isPowerOn())
370 {
371 return;
372 }
373
374 auto range = sensor.getRange(_deviation, _upperDeviation);
375 std::string rangeMax = "NoMax";
376 if (range.second)
377 {
378 rangeMax = std::to_string(range.second.value());
379 }
380
381 // Skip starting the error timer if the sensor
382 // isn't on D-Bus as this isn't a fan hardware problem.
383 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
384
385 getLogger().log(std::format(
386 "Setting tach sensor {} functional state to {}. "
387 "[target = {}, actual = {}, allowed range = ({} - {}) "
388 "owned = {}]",
389 sensor.name(), sensor.functional(), sensor.getTarget(),
390 sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
391
392 // A zero value for _numSensorFailsForNonFunc means we aren't dealing
393 // with fan FRU functional status, only sensor functional status.
394 if (_numSensorFailsForNonFunc)
395 {
396 auto numNonFuncSensors = countNonFunctionalSensors();
397 // If the fan was nonfunctional and enough sensors are now OK,
398 // the fan can be set to functional as long as `set_func_on_present` was
399 // not set
400 if (!_setFuncOnPresent && !_functional &&
401 !(numNonFuncSensors >= _numSensorFailsForNonFunc))
402 {
403 getLogger().log(std::format("Setting fan {} to functional, number "
404 "of nonfunctional sensors = {}",
405 _name, numNonFuncSensors));
406 updateInventory(true);
407 }
408
409 // If the fan is currently functional, but too many
410 // contained sensors are now nonfunctional, update
411 // the fan to nonfunctional.
412 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
413 {
414 getLogger().log(std::format("Setting fan {} to nonfunctional, "
415 "number of nonfunctional sensors = {}",
416 _name, numNonFuncSensors));
417 updateInventory(false);
418 }
419 }
420
421 // Skip the power off rule checks if the sensor isn't
422 // on D-Bus so a running system isn't shutdown.
423 _system.fanStatusChange(*this, !sensor.hasOwner());
424 }
425
updateInventory(bool functional)426 bool Fan::updateInventory(bool functional)
427 {
428 bool dbusError = false;
429
430 try
431 {
432 auto objectMap =
433 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
434 util::FUNCTIONAL_PROPERTY, functional);
435
436 auto response = util::SDBusPlus::callMethod(
437 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
438 util::INVENTORY_INTF, "Notify", objectMap);
439
440 if (response.is_method_error())
441 {
442 log<level::ERR>("Error in Notify call to update inventory");
443
444 dbusError = true;
445 }
446 }
447 catch (const util::DBusError& e)
448 {
449 dbusError = true;
450
451 getLogger().log(
452 std::format("D-Bus Exception reading/updating inventory : {}",
453 e.what()),
454 Logger::error);
455 }
456
457 // This will always track the current state of the inventory.
458 _functional = functional;
459
460 return dbusError;
461 }
462
presenceChanged(sdbusplus::message_t & msg)463 void Fan::presenceChanged(sdbusplus::message_t& msg)
464 {
465 std::string interface;
466 std::map<std::string, std::variant<bool>> properties;
467
468 msg.read(interface, properties);
469
470 auto presentProp = properties.find("Present");
471 if (presentProp != properties.end())
472 {
473 _present = std::get<bool>(presentProp->second);
474
475 getLogger().log(
476 std::format("Fan {} presence state change to {}", _name, _present));
477
478 if (_present && _setFuncOnPresent)
479 {
480 updateInventory(true);
481 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
482 sensor->setFunctional(true);
483 sensor->resetMethod();
484 });
485 }
486
487 _system.fanStatusChange(*this);
488
489 if (_fanMissingErrorDelay)
490 {
491 if (!_present && _system.isPowerOn())
492 {
493 _fanMissingErrorTimer->restartOnce(
494 std::chrono::seconds{*_fanMissingErrorDelay});
495 }
496 else if (_present && _fanMissingErrorTimer->isEnabled())
497 {
498 _fanMissingErrorTimer->setEnabled(false);
499 }
500 }
501 }
502 }
503
sensorErrorTimerExpired(const TachSensor & sensor)504 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
505 {
506 if (_present && _system.isPowerOn())
507 {
508 _system.sensorErrorTimerExpired(*this, sensor);
509 }
510 }
511
powerStateChanged(bool powerStateOn)512 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
513 {
514 #ifdef MONITOR_USE_JSON
515 if (powerStateOn)
516 {
517 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
518
519 _numSensorsOnDBusAtPowerOn = 0;
520
521 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
522 try
523 {
524 // Force a getProperty call. If sensor is on D-Bus,
525 // then make sure it's functional.
526 sensor->updateTachAndTarget();
527
528 _numSensorsOnDBusAtPowerOn++;
529
530 if (_present)
531 {
532 // If not functional, set it back to functional.
533 if (!sensor->functional())
534 {
535 sensor->setFunctional(true);
536 _system.fanStatusChange(*this, true);
537 }
538
539 // Set the counters back to zero
540 if (sensor->getMethod() == MethodMode::count)
541 {
542 sensor->resetMethod();
543 }
544 }
545 }
546 catch (const util::DBusError& e)
547 {
548 // Properties still aren't on D-Bus. Let startMonitor()
549 // deal with it, or maybe System::powerStateChanged() if
550 // there aren't any sensors at all on D-Bus.
551 getLogger().log(std::format(
552 "At power on, tach sensor {} value not on D-Bus",
553 sensor->name()));
554 }
555 });
556
557 if (_present)
558 {
559 // If configured to change functional state on the fan itself,
560 // Set it back to true now if necessary.
561 if (_numSensorFailsForNonFunc)
562 {
563 if (!_functional &&
564 (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
565 {
566 updateInventory(true);
567 }
568 }
569 }
570 else
571 {
572 getLogger().log(
573 std::format("At power on, fan {} is missing", _name));
574
575 if (_fanMissingErrorTimer)
576 {
577 _fanMissingErrorTimer->restartOnce(
578 std::chrono::seconds{*_fanMissingErrorDelay});
579 }
580 }
581 }
582 else
583 {
584 _monitorReady = false;
585
586 if (_monitorTimer.isEnabled())
587 {
588 _monitorTimer.setEnabled(false);
589 }
590
591 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
592 {
593 _fanMissingErrorTimer->setEnabled(false);
594 }
595
596 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
597 if (sensor->timerRunning())
598 {
599 sensor->stopTimer();
600 }
601
602 sensor->stopCountTimer();
603 });
604 }
605 #endif
606 }
607
608 } // namespace monitor
609 } // namespace fan
610 } // namespace phosphor
611