1 /**
2 * Copyright © 2022 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "fan.hpp"
17
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23
24 #include <phosphor-logging/lg2.hpp>
25
26 #include <format>
27
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34
35 using namespace sdbusplus::bus::match;
36
Fan(Mode mode,sdbusplus::bus_t & bus,const sdeventplus::Event & event,std::unique_ptr<trust::Manager> & trust,const FanDefinition & def,System & system)37 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
38 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
39 System& system) :
40 _bus(bus), _name(def.name), _deviation(def.deviation),
41 _upperDeviation(def.upperDeviation),
42 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
43 _trustManager(trust),
44 #ifdef MONITOR_USE_JSON
45 _monitorDelay(def.monitorStartDelay),
46 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
47 #endif
48 _system(system),
49 _presenceMatch(bus,
50 rules::propertiesChanged(util::INVENTORY_PATH + _name,
51 util::INV_ITEM_IFACE),
52 std::bind(std::mem_fn(&Fan::presenceChanged), this,
53 std::placeholders::_1)),
54 _presenceIfaceAddedMatch(
55 bus,
56 rules::interfacesAdded() +
57 rules::argNpath(0, util::INVENTORY_PATH + _name),
58 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
59 std::placeholders::_1)),
60 _fanMissingErrorDelay(def.fanMissingErrDelay),
61 _setFuncOnPresent(def.funcOnPresent)
62 {
63 // Setup tach sensors for monitoring
64 for (const auto& s : def.sensorList)
65 {
66 _sensors.emplace_back(std::make_shared<TachSensor>(
67 mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
68 s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
69 s.threshold, s.ignoreAboveMax, def.timeout,
70 def.nonfuncRotorErrDelay, def.countInterval, event));
71
72 _trustManager->registerSensor(_sensors.back());
73 }
74
75 bool functionalState =
76 (_numSensorFailsForNonFunc == 0) ||
77 (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
78
79 if (updateInventory(functionalState) && !functionalState)
80 {
81 // the inventory update threw an exception, possibly because D-Bus
82 // wasn't ready. Try to update sensors back to functional to avoid a
83 // false-alarm. They will be updated again from subscribing to the
84 // properties-changed event
85
86 for (auto& sensor : _sensors)
87 sensor->setFunctional(true);
88 }
89
90 #ifndef MONITOR_USE_JSON
91 // Check current tach state when entering monitor mode
92 if (mode != Mode::init)
93 {
94 _monitorReady = true;
95
96 // The TachSensors will now have already read the input
97 // and target values, so check them.
98 tachChanged();
99 }
100 #else
101 if (_system.isPowerOn())
102 {
103 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
104 }
105 #endif
106
107 if (_fanMissingErrorDelay)
108 {
109 _fanMissingErrorTimer = std::make_unique<
110 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
111 event, std::bind(&System::fanMissingErrorTimerExpired, &system,
112 std::ref(*this)));
113 }
114
115 try
116 {
117 _present = util::SDBusPlus::getProperty<bool>(
118 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
119
120 if (!_present)
121 {
122 getLogger().log(
123 std::format("On startup, fan {} is missing", _name));
124 if (_system.isPowerOn() && _fanMissingErrorTimer)
125 {
126 _fanMissingErrorTimer->restartOnce(
127 std::chrono::seconds{*_fanMissingErrorDelay});
128 }
129 }
130 }
131 catch (const util::DBusServiceError& e)
132 {
133 // This could happen on the first BMC boot if the presence
134 // detect app hasn't started yet and there isn't an inventory
135 // cache yet.
136 }
137 }
138
presenceIfaceAdded(sdbusplus::message_t & msg)139 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
140 {
141 sdbusplus::message::object_path path;
142 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
143
144 msg.read(path, interfaces);
145
146 auto properties = interfaces.find(util::INV_ITEM_IFACE);
147 if (properties == interfaces.end())
148 {
149 return;
150 }
151
152 auto property = properties->second.find("Present");
153 if (property == properties->second.end())
154 {
155 return;
156 }
157
158 _present = std::get<bool>(property->second);
159
160 if (!_present)
161 {
162 getLogger().log(std::format(
163 "New fan {} interface added and fan is not present", _name));
164 if (_system.isPowerOn() && _fanMissingErrorTimer)
165 {
166 _fanMissingErrorTimer->restartOnce(
167 std::chrono::seconds{*_fanMissingErrorDelay});
168 }
169 }
170
171 _system.fanStatusChange(*this);
172 }
173
startMonitor()174 void Fan::startMonitor()
175 {
176 _monitorReady = true;
177
178 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
179 try
180 {
181 // Force a getProperty call to check if the tach sensor is
182 // on D-Bus. If it isn't, now set it to nonfunctional.
183 // This isn't done earlier so that code watching for
184 // nonfunctional tach sensors doesn't take actions before
185 // those sensors show up on D-Bus.
186 sensor->updateTachAndTarget();
187 tachChanged(*sensor);
188 }
189 catch (const util::DBusServiceError& e)
190 {
191 // The tach property still isn't on D-Bus. Ensure
192 // sensor is nonfunctional, but skip creating an
193 // error for it since it isn't a fan problem.
194 getLogger().log(std::format(
195 "Monitoring starting but {} sensor value not on D-Bus",
196 sensor->name()));
197
198 sensor->setFunctional(false, true);
199
200 if (_numSensorFailsForNonFunc)
201 {
202 if (_functional &&
203 (countNonFunctionalSensors() >= _numSensorFailsForNonFunc))
204 {
205 updateInventory(false);
206 }
207 }
208
209 // At this point, don't start any power off actions due
210 // to missing sensors. Let something else handle that
211 // policy.
212 _system.fanStatusChange(*this, true);
213 }
214 });
215 }
216
tachChanged()217 void Fan::tachChanged()
218 {
219 if (_monitorReady)
220 {
221 for (auto& s : _sensors)
222 {
223 tachChanged(*s);
224 }
225 }
226 }
227
tachChanged(TachSensor & sensor)228 void Fan::tachChanged(TachSensor& sensor)
229 {
230 if (!_system.isPowerOn() || !_monitorReady)
231 {
232 return;
233 }
234
235 if (_trustManager->active())
236 {
237 if (!_trustManager->checkTrust(sensor))
238 {
239 return;
240 }
241 }
242
243 // If the error checking method is 'count', if a tach change leads
244 // to an out of range sensor the count timer will take over in calling
245 // process() until the sensor is healthy again.
246 if (!sensor.countTimerRunning())
247 {
248 process(sensor);
249 }
250 }
251
countTimerExpired(TachSensor & sensor)252 void Fan::countTimerExpired(TachSensor& sensor)
253 {
254 if (_trustManager->active() && !_trustManager->checkTrust(sensor))
255 {
256 return;
257 }
258 process(sensor);
259 }
260
process(TachSensor & sensor)261 void Fan::process(TachSensor& sensor)
262 {
263 // If this sensor is out of range at this moment, start
264 // its timer, at the end of which the inventory
265 // for the fan may get updated to not functional.
266
267 // If this sensor is OK, put everything back into a good state.
268
269 if (outOfRange(sensor))
270 {
271 if (sensor.functional())
272 {
273 switch (sensor.getMethod())
274 {
275 case MethodMode::timebased:
276 // Start nonfunctional timer if not already running
277 sensor.startTimer(TimerMode::nonfunc);
278 break;
279 case MethodMode::count:
280
281 if (!sensor.countTimerRunning())
282 {
283 sensor.startCountTimer();
284 }
285 sensor.setCounter(true);
286 if (sensor.getCounter() >= sensor.getThreshold())
287 {
288 updateState(sensor);
289 }
290 break;
291 }
292 }
293 }
294 else
295 {
296 switch (sensor.getMethod())
297 {
298 case MethodMode::timebased:
299 if (sensor.functional())
300 {
301 if (sensor.timerRunning())
302 {
303 sensor.stopTimer();
304 }
305 }
306 else
307 {
308 // Start functional timer if not already running
309 sensor.startTimer(TimerMode::func);
310 }
311 break;
312 case MethodMode::count:
313 sensor.setCounter(false);
314 if (sensor.getCounter() == 0)
315 {
316 if (!sensor.functional())
317 {
318 updateState(sensor);
319 }
320
321 sensor.stopCountTimer();
322 }
323 break;
324 }
325 }
326 }
327
findTargetSpeed()328 uint64_t Fan::findTargetSpeed()
329 {
330 uint64_t target = 0;
331 // The sensor doesn't support a target,
332 // so get it from another sensor.
333 auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
334 return s->hasTarget();
335 });
336
337 if (s != _sensors.end())
338 {
339 target = (*s)->getTarget();
340 }
341
342 return target;
343 }
344
countNonFunctionalSensors() const345 size_t Fan::countNonFunctionalSensors() const
346 {
347 return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
348 return !s->functional();
349 });
350 }
351
outOfRange(const TachSensor & sensor)352 bool Fan::outOfRange(const TachSensor& sensor)
353 {
354 if (!sensor.hasOwner())
355 {
356 return true;
357 }
358
359 auto actual = static_cast<uint64_t>(sensor.getInput());
360 auto range = sensor.getRange(_deviation, _upperDeviation);
361
362 return ((actual < range.first) ||
363 (range.second && actual > range.second.value()));
364 }
365
updateState(TachSensor & sensor)366 void Fan::updateState(TachSensor& sensor)
367 {
368 if (!_system.isPowerOn())
369 {
370 return;
371 }
372
373 auto range = sensor.getRange(_deviation, _upperDeviation);
374 std::string rangeMax = "NoMax";
375 if (range.second)
376 {
377 rangeMax = std::to_string(range.second.value());
378 }
379
380 // Skip starting the error timer if the sensor
381 // isn't on D-Bus as this isn't a fan hardware problem.
382 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
383
384 getLogger().log(std::format(
385 "Setting tach sensor {} functional state to {}. "
386 "[target = {}, actual = {}, allowed range = ({} - {}) "
387 "owned = {}]",
388 sensor.name(), sensor.functional(), sensor.getTarget(),
389 sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
390
391 // A zero value for _numSensorFailsForNonFunc means we aren't dealing
392 // with fan FRU functional status, only sensor functional status.
393 if (_numSensorFailsForNonFunc)
394 {
395 auto numNonFuncSensors = countNonFunctionalSensors();
396 // If the fan was nonfunctional and enough sensors are now OK,
397 // the fan can be set to functional as long as `set_func_on_present` was
398 // not set
399 if (!_setFuncOnPresent && !_functional &&
400 !(numNonFuncSensors >= _numSensorFailsForNonFunc))
401 {
402 getLogger().log(std::format("Setting fan {} to functional, number "
403 "of nonfunctional sensors = {}",
404 _name, numNonFuncSensors));
405 updateInventory(true);
406 }
407
408 // If the fan is currently functional, but too many
409 // contained sensors are now nonfunctional, update
410 // the fan to nonfunctional.
411 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
412 {
413 getLogger().log(std::format("Setting fan {} to nonfunctional, "
414 "number of nonfunctional sensors = {}",
415 _name, numNonFuncSensors));
416 updateInventory(false);
417 }
418 }
419
420 // Skip the power off rule checks if the sensor isn't
421 // on D-Bus so a running system isn't shutdown.
422 _system.fanStatusChange(*this, !sensor.hasOwner());
423 }
424
updateInventory(bool functional)425 bool Fan::updateInventory(bool functional)
426 {
427 bool dbusError = false;
428
429 try
430 {
431 auto objectMap =
432 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
433 util::FUNCTIONAL_PROPERTY, functional);
434
435 auto response = util::SDBusPlus::callMethod(
436 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
437 util::INVENTORY_INTF, "Notify", objectMap);
438
439 if (response.is_method_error())
440 {
441 lg2::error("Error in Notify call to update inventory");
442
443 dbusError = true;
444 }
445 }
446 catch (const util::DBusError& e)
447 {
448 dbusError = true;
449
450 getLogger().log(
451 std::format("D-Bus Exception reading/updating inventory : {}",
452 e.what()),
453 Logger::error);
454 }
455
456 // This will always track the current state of the inventory.
457 _functional = functional;
458
459 return dbusError;
460 }
461
presenceChanged(sdbusplus::message_t & msg)462 void Fan::presenceChanged(sdbusplus::message_t& msg)
463 {
464 std::string interface;
465 std::map<std::string, std::variant<bool>> properties;
466
467 msg.read(interface, properties);
468
469 auto presentProp = properties.find("Present");
470 if (presentProp != properties.end())
471 {
472 _present = std::get<bool>(presentProp->second);
473
474 getLogger().log(
475 std::format("Fan {} presence state change to {}", _name, _present));
476
477 if (_present && _setFuncOnPresent)
478 {
479 updateInventory(true);
480 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
481 sensor->setFunctional(true);
482 sensor->resetMethod();
483 });
484 }
485
486 _system.fanStatusChange(*this);
487
488 if (_fanMissingErrorDelay)
489 {
490 if (!_present && _system.isPowerOn())
491 {
492 _fanMissingErrorTimer->restartOnce(
493 std::chrono::seconds{*_fanMissingErrorDelay});
494 }
495 else if (_present && _fanMissingErrorTimer->isEnabled())
496 {
497 _fanMissingErrorTimer->setEnabled(false);
498 }
499 }
500 }
501 }
502
sensorErrorTimerExpired(const TachSensor & sensor)503 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
504 {
505 if (_present && _system.isPowerOn())
506 {
507 _system.sensorErrorTimerExpired(*this, sensor);
508 }
509 }
510
powerStateChanged(bool powerStateOn)511 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
512 {
513 #ifdef MONITOR_USE_JSON
514 if (powerStateOn)
515 {
516 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
517
518 _numSensorsOnDBusAtPowerOn = 0;
519
520 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
521 try
522 {
523 // Force a getProperty call. If sensor is on D-Bus,
524 // then make sure it's functional.
525 sensor->updateTachAndTarget();
526
527 _numSensorsOnDBusAtPowerOn++;
528
529 if (_present)
530 {
531 // If not functional, set it back to functional.
532 if (!sensor->functional())
533 {
534 sensor->setFunctional(true);
535 _system.fanStatusChange(*this, true);
536 }
537
538 // Set the counters back to zero
539 if (sensor->getMethod() == MethodMode::count)
540 {
541 sensor->resetMethod();
542 }
543 }
544 }
545 catch (const util::DBusError& e)
546 {
547 // Properties still aren't on D-Bus. Let startMonitor()
548 // deal with it, or maybe System::powerStateChanged() if
549 // there aren't any sensors at all on D-Bus.
550 getLogger().log(std::format(
551 "At power on, tach sensor {} value not on D-Bus",
552 sensor->name()));
553 }
554 });
555
556 if (_present)
557 {
558 // If configured to change functional state on the fan itself,
559 // Set it back to true now if necessary.
560 if (_numSensorFailsForNonFunc)
561 {
562 if (!_functional &&
563 (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
564 {
565 updateInventory(true);
566 }
567 }
568 }
569 else
570 {
571 getLogger().log(
572 std::format("At power on, fan {} is missing", _name));
573
574 if (_fanMissingErrorTimer)
575 {
576 _fanMissingErrorTimer->restartOnce(
577 std::chrono::seconds{*_fanMissingErrorDelay});
578 }
579 }
580 }
581 else
582 {
583 _monitorReady = false;
584
585 if (_monitorTimer.isEnabled())
586 {
587 _monitorTimer.setEnabled(false);
588 }
589
590 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
591 {
592 _fanMissingErrorTimer->setEnabled(false);
593 }
594
595 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
596 if (sensor->timerRunning())
597 {
598 sensor->stopTimer();
599 }
600
601 sensor->stopCountTimer();
602 });
603 }
604 #endif
605 }
606
607 } // namespace monitor
608 } // namespace fan
609 } // namespace phosphor
610