1 /**
2 * Copyright © 2022 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "fan.hpp"
17
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23
24 #include <phosphor-logging/lg2.hpp>
25
26 #include <format>
27
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34
35 using namespace sdbusplus::bus::match;
36
Fan(Mode mode,sdbusplus::bus_t & bus,const sdeventplus::Event & event,std::unique_ptr<trust::Manager> & trust,const FanDefinition & def,System & system)37 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
38 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
39 System& system) :
40 _bus(bus), _name(def.name), _deviation(def.deviation),
41 _upperDeviation(def.upperDeviation),
42 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
43 _trustManager(trust),
44 #ifdef MONITOR_USE_JSON
45 _monitorDelay(def.monitorStartDelay),
46 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
47 #endif
48 _system(system),
49 _presenceMatch(bus,
50 rules::propertiesChanged(util::INVENTORY_PATH + _name,
51 util::INV_ITEM_IFACE),
52 std::bind(std::mem_fn(&Fan::presenceChanged), this,
53 std::placeholders::_1)),
54 _presenceIfaceAddedMatch(
55 bus,
56 rules::interfacesAdded() +
57 rules::argNpath(0, util::INVENTORY_PATH + _name),
58 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
59 std::placeholders::_1)),
60 _fanMissingErrorDelay(def.fanMissingErrDelay),
61 _setFuncOnPresent(def.funcOnPresent)
62 {
63 // Setup tach sensors for monitoring
64 for (const auto& s : def.sensorList)
65 {
66 _sensors.emplace_back(std::make_shared<TachSensor>(
67 mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
68 s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
69 s.threshold, s.ignoreAboveMax, def.timeout,
70 def.nonfuncRotorErrDelay, def.countInterval, event));
71
72 _trustManager->registerSensor(_sensors.back());
73 }
74
75 bool functionalState =
76 (_numSensorFailsForNonFunc == 0) ||
77 (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
78
79 if (updateInventory(functionalState) && !functionalState)
80 {
81 // the inventory update threw an exception, possibly because D-Bus
82 // wasn't ready. Try to update sensors back to functional to avoid a
83 // false-alarm. They will be updated again from subscribing to the
84 // properties-changed event
85
86 for (auto& sensor : _sensors)
87 sensor->setFunctional(true);
88 }
89
90 #ifndef MONITOR_USE_JSON
91 // Check current tach state when entering monitor mode
92 if (mode != Mode::init)
93 {
94 _monitorReady = true;
95
96 // The TachSensors will now have already read the input
97 // and target values, so check them.
98 tachChanged();
99 }
100 #else
101 if (_system.isPowerOn())
102 {
103 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
104 }
105 #endif
106
107 if (_fanMissingErrorDelay)
108 {
109 _fanMissingErrorTimer = std::make_unique<
110 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
111 event, std::bind(&System::fanMissingErrorTimerExpired, &system,
112 std::ref(*this)));
113 }
114
115 try
116 {
117 _present = util::SDBusPlus::getProperty<bool>(
118 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
119
120 if (!_present)
121 {
122 getLogger().log(
123 std::format("On startup, fan {} is missing", _name));
124 if (_system.isPowerOn() && _fanMissingErrorTimer &&
125 _fanMissingErrorDelay)
126 {
127 _fanMissingErrorTimer->restartOnce(
128 std::chrono::seconds{*_fanMissingErrorDelay});
129 }
130 }
131 }
132 catch (const util::DBusServiceError& e)
133 {
134 // This could happen on the first BMC boot if the presence
135 // detect app hasn't started yet and there isn't an inventory
136 // cache yet.
137 }
138 }
139
presenceIfaceAdded(sdbusplus::message_t & msg)140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
141 {
142 sdbusplus::message::object_path path;
143 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
144
145 msg.read(path, interfaces);
146
147 auto properties = interfaces.find(util::INV_ITEM_IFACE);
148 if (properties == interfaces.end())
149 {
150 return;
151 }
152
153 auto property = properties->second.find("Present");
154 if (property == properties->second.end())
155 {
156 return;
157 }
158
159 _present = std::get<bool>(property->second);
160
161 if (!_present)
162 {
163 getLogger().log(std::format(
164 "New fan {} interface added and fan is not present", _name));
165 if (_system.isPowerOn() && _fanMissingErrorTimer &&
166 _fanMissingErrorDelay)
167 {
168 _fanMissingErrorTimer->restartOnce(
169 std::chrono::seconds{*_fanMissingErrorDelay});
170 }
171 }
172
173 _system.fanStatusChange(*this);
174 }
175
startMonitor()176 void Fan::startMonitor()
177 {
178 _monitorReady = true;
179
180 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
181 try
182 {
183 // Force a getProperty call to check if the tach sensor is
184 // on D-Bus. If it isn't, now set it to nonfunctional.
185 // This isn't done earlier so that code watching for
186 // nonfunctional tach sensors doesn't take actions before
187 // those sensors show up on D-Bus.
188 sensor->updateTachAndTarget();
189 tachChanged(*sensor);
190 }
191 catch (const util::DBusServiceError& e)
192 {
193 // The tach property still isn't on D-Bus. Ensure
194 // sensor is nonfunctional, but skip creating an
195 // error for it since it isn't a fan problem.
196 getLogger().log(std::format(
197 "Monitoring starting but {} sensor value not on D-Bus",
198 sensor->name()));
199
200 sensor->setFunctional(false, true);
201
202 if (_numSensorFailsForNonFunc)
203 {
204 if (_functional &&
205 (countNonFunctionalSensors() >= _numSensorFailsForNonFunc))
206 {
207 updateInventory(false);
208 }
209 }
210
211 // At this point, don't start any power off actions due
212 // to missing sensors. Let something else handle that
213 // policy.
214 _system.fanStatusChange(*this, true);
215 }
216 });
217 }
218
tachChanged()219 void Fan::tachChanged()
220 {
221 if (_monitorReady)
222 {
223 for (auto& s : _sensors)
224 {
225 tachChanged(*s);
226 }
227 }
228 }
229
tachChanged(TachSensor & sensor)230 void Fan::tachChanged(TachSensor& sensor)
231 {
232 if (!_system.isPowerOn() || !_monitorReady)
233 {
234 return;
235 }
236
237 if (_trustManager->active())
238 {
239 if (!_trustManager->checkTrust(sensor))
240 {
241 return;
242 }
243 }
244
245 // If the error checking method is 'count', if a tach change leads
246 // to an out of range sensor the count timer will take over in calling
247 // process() until the sensor is healthy again.
248 if (!sensor.countTimerRunning())
249 {
250 process(sensor);
251 }
252 }
253
countTimerExpired(TachSensor & sensor)254 void Fan::countTimerExpired(TachSensor& sensor)
255 {
256 if (_trustManager->active() && !_trustManager->checkTrust(sensor))
257 {
258 return;
259 }
260 process(sensor);
261 }
262
process(TachSensor & sensor)263 void Fan::process(TachSensor& sensor)
264 {
265 // If this sensor is out of range at this moment, start
266 // its timer, at the end of which the inventory
267 // for the fan may get updated to not functional.
268
269 // If this sensor is OK, put everything back into a good state.
270
271 if (outOfRange(sensor))
272 {
273 if (sensor.functional())
274 {
275 switch (sensor.getMethod())
276 {
277 case MethodMode::timebased:
278 // Start nonfunctional timer if not already running
279 sensor.startTimer(TimerMode::nonfunc);
280 break;
281 case MethodMode::count:
282
283 if (!sensor.countTimerRunning())
284 {
285 sensor.startCountTimer();
286 }
287 sensor.setCounter(true);
288 if (sensor.getCounter() >= sensor.getThreshold())
289 {
290 updateState(sensor);
291 }
292 break;
293 }
294 }
295 }
296 else
297 {
298 switch (sensor.getMethod())
299 {
300 case MethodMode::timebased:
301 if (sensor.functional())
302 {
303 if (sensor.timerRunning())
304 {
305 sensor.stopTimer();
306 }
307 }
308 else
309 {
310 // Start functional timer if not already running
311 sensor.startTimer(TimerMode::func);
312 }
313 break;
314 case MethodMode::count:
315 sensor.setCounter(false);
316 if (sensor.getCounter() == 0)
317 {
318 if (!sensor.functional())
319 {
320 updateState(sensor);
321 }
322
323 sensor.stopCountTimer();
324 }
325 break;
326 }
327 }
328 }
329
findTargetSpeed()330 uint64_t Fan::findTargetSpeed()
331 {
332 uint64_t target = 0;
333 // The sensor doesn't support a target,
334 // so get it from another sensor.
335 auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
336 return s->hasTarget();
337 });
338
339 if (s != _sensors.end())
340 {
341 target = (*s)->getTarget();
342 }
343
344 return target;
345 }
346
countNonFunctionalSensors() const347 size_t Fan::countNonFunctionalSensors() const
348 {
349 return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
350 return !s->functional();
351 });
352 }
353
outOfRange(const TachSensor & sensor)354 bool Fan::outOfRange(const TachSensor& sensor)
355 {
356 if (!sensor.hasOwner())
357 {
358 return true;
359 }
360
361 auto actual = static_cast<uint64_t>(sensor.getInput());
362 auto range = sensor.getRange(_deviation, _upperDeviation);
363
364 return ((actual < range.first) ||
365 (range.second && actual > range.second.value()));
366 }
367
updateState(TachSensor & sensor)368 void Fan::updateState(TachSensor& sensor)
369 {
370 if (!_system.isPowerOn())
371 {
372 return;
373 }
374
375 auto range = sensor.getRange(_deviation, _upperDeviation);
376 std::string rangeMax = "NoMax";
377 if (range.second)
378 {
379 rangeMax = std::to_string(range.second.value());
380 }
381
382 // Skip starting the error timer if the sensor
383 // isn't on D-Bus as this isn't a fan hardware problem.
384 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
385
386 getLogger().log(std::format(
387 "Setting tach sensor {} functional state to {}. "
388 "[target = {}, actual = {}, allowed range = ({} - {}) "
389 "owned = {}]",
390 sensor.name(), sensor.functional(), sensor.getTarget(),
391 sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
392
393 // A zero value for _numSensorFailsForNonFunc means we aren't dealing
394 // with fan FRU functional status, only sensor functional status.
395 if (_numSensorFailsForNonFunc)
396 {
397 auto numNonFuncSensors = countNonFunctionalSensors();
398 // If the fan was nonfunctional and enough sensors are now OK,
399 // the fan can be set to functional as long as `set_func_on_present` was
400 // not set
401 if (!_setFuncOnPresent && !_functional &&
402 !(numNonFuncSensors >= _numSensorFailsForNonFunc))
403 {
404 getLogger().log(std::format("Setting fan {} to functional, number "
405 "of nonfunctional sensors = {}",
406 _name, numNonFuncSensors));
407 updateInventory(true);
408 }
409
410 // If the fan is currently functional, but too many
411 // contained sensors are now nonfunctional, update
412 // the fan to nonfunctional.
413 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
414 {
415 getLogger().log(std::format("Setting fan {} to nonfunctional, "
416 "number of nonfunctional sensors = {}",
417 _name, numNonFuncSensors));
418 updateInventory(false);
419 }
420 }
421
422 // Skip the power off rule checks if the sensor isn't
423 // on D-Bus so a running system isn't shutdown.
424 _system.fanStatusChange(*this, !sensor.hasOwner());
425 }
426
updateInventory(bool functional)427 bool Fan::updateInventory(bool functional)
428 {
429 bool dbusError = false;
430
431 try
432 {
433 auto objectMap =
434 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
435 util::FUNCTIONAL_PROPERTY, functional);
436
437 auto response = util::SDBusPlus::callMethod(
438 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
439 util::INVENTORY_INTF, "Notify", objectMap);
440
441 if (response.is_method_error())
442 {
443 lg2::error("Error in Notify call to update inventory");
444
445 dbusError = true;
446 }
447 }
448 catch (const util::DBusError& e)
449 {
450 dbusError = true;
451
452 getLogger().log(
453 std::format("D-Bus Exception reading/updating inventory : {}",
454 e.what()),
455 Logger::error);
456 }
457
458 // This will always track the current state of the inventory.
459 _functional = functional;
460
461 return dbusError;
462 }
463
presenceChanged(sdbusplus::message_t & msg)464 void Fan::presenceChanged(sdbusplus::message_t& msg)
465 {
466 std::string interface;
467 std::map<std::string, std::variant<bool>> properties;
468
469 msg.read(interface, properties);
470
471 auto presentProp = properties.find("Present");
472 if (presentProp != properties.end())
473 {
474 _present = std::get<bool>(presentProp->second);
475
476 getLogger().log(
477 std::format("Fan {} presence state change to {}", _name, _present));
478
479 if (_present && _setFuncOnPresent)
480 {
481 updateInventory(true);
482 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
483 sensor->setFunctional(true);
484 sensor->resetMethod();
485 });
486 }
487
488 _system.fanStatusChange(*this);
489
490 if (_fanMissingErrorDelay)
491 {
492 if (!_present && _system.isPowerOn())
493 {
494 _fanMissingErrorTimer->restartOnce(
495 std::chrono::seconds{*_fanMissingErrorDelay});
496 }
497 else if (_present && _fanMissingErrorTimer->isEnabled())
498 {
499 _fanMissingErrorTimer->setEnabled(false);
500 }
501 }
502 }
503 }
504
sensorErrorTimerExpired(const TachSensor & sensor)505 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
506 {
507 if (_present && _system.isPowerOn())
508 {
509 _system.sensorErrorTimerExpired(*this, sensor);
510 }
511 }
512
powerStateChanged(bool powerStateOn)513 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
514 {
515 #ifdef MONITOR_USE_JSON
516 if (powerStateOn)
517 {
518 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
519
520 _numSensorsOnDBusAtPowerOn = 0;
521
522 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
523 try
524 {
525 // Force a getProperty call. If sensor is on D-Bus,
526 // then make sure it's functional.
527 sensor->updateTachAndTarget();
528
529 _numSensorsOnDBusAtPowerOn++;
530
531 if (_present)
532 {
533 // If not functional, set it back to functional.
534 if (!sensor->functional())
535 {
536 sensor->setFunctional(true);
537 _system.fanStatusChange(*this, true);
538 }
539
540 // Set the counters back to zero
541 if (sensor->getMethod() == MethodMode::count)
542 {
543 sensor->resetMethod();
544 }
545 }
546 }
547 catch (const util::DBusError& e)
548 {
549 // Properties still aren't on D-Bus. Let startMonitor()
550 // deal with it, or maybe System::powerStateChanged() if
551 // there aren't any sensors at all on D-Bus.
552 getLogger().log(std::format(
553 "At power on, tach sensor {} value not on D-Bus",
554 sensor->name()));
555 }
556 });
557
558 if (_present)
559 {
560 // If configured to change functional state on the fan itself,
561 // Set it back to true now if necessary.
562 if (_numSensorFailsForNonFunc)
563 {
564 if (!_functional &&
565 (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
566 {
567 updateInventory(true);
568 }
569 }
570 }
571 else
572 {
573 getLogger().log(
574 std::format("At power on, fan {} is missing", _name));
575
576 if (_fanMissingErrorTimer && _fanMissingErrorDelay)
577 {
578 _fanMissingErrorTimer->restartOnce(
579 std::chrono::seconds{*_fanMissingErrorDelay});
580 }
581 }
582 }
583 else
584 {
585 _monitorReady = false;
586
587 if (_monitorTimer.isEnabled())
588 {
589 _monitorTimer.setEnabled(false);
590 }
591
592 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
593 {
594 _fanMissingErrorTimer->setEnabled(false);
595 }
596
597 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
598 if (sensor->timerRunning())
599 {
600 sensor->stopTimer();
601 }
602
603 sensor->stopCountTimer();
604 });
605 }
606 #endif
607 }
608
609 } // namespace monitor
610 } // namespace fan
611 } // namespace phosphor
612