1 /**
2 * Copyright 2017 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "dbuspassive.hpp"
18
19 #include "conf.hpp"
20 #include "dbushelper_interface.hpp"
21 #include "dbuspassiveredundancy.hpp"
22 #include "dbusutil.hpp"
23 #include "failsafeloggers/failsafe_logger_utility.hpp"
24 #include "interfaces.hpp"
25 #include "util.hpp"
26
27 #include <systemd/sd-bus.h>
28
29 #include <sdbusplus/bus.hpp>
30 #include <sdbusplus/message.hpp>
31
32 #include <chrono>
33 #include <cmath>
34 #include <cstdint>
35 #include <exception>
36 #include <limits>
37 #include <map>
38 #include <memory>
39 #include <mutex>
40 #include <set>
41 #include <string>
42 #include <utility>
43 #include <variant>
44
45 #include "failsafeloggers/failsafe_logger.cpp"
46
47 namespace pid_control
48 {
49
createDbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const conf::SensorConfig * info,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)50 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
51 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
52 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
53 const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
54 {
55 if (helper == nullptr)
56 {
57 return nullptr;
58 }
59 if (!validType(type))
60 {
61 return nullptr;
62 }
63
64 /* Need to get the scale and initial value */
65 /* service == busname */
66 std::string path;
67 if (info->readPath.empty())
68 {
69 path = getSensorPath(type, id);
70 }
71 else
72 {
73 path = info->readPath;
74 }
75
76 SensorProperties settings;
77 bool failed;
78 std::string service;
79
80 try
81 {
82 service = helper->getService(sensorintf, path);
83 }
84 catch (const std::exception& e)
85 {
86 #ifndef HANDLE_MISSING_OBJECT_PATHS
87 return nullptr;
88 #else
89 // CASE1: The sensor is not on DBus, but as it is not in the
90 // MissingIsAcceptable list, the sensor should be built with a failed
91 // state to send the zone to failsafe mode. Everything will recover if
92 // all important sensors are back to DBus. swampd will be informed
93 // through InterfacesAdded signals and the sensors will be built again.
94
95 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
96 // fails (e.g., D-Bus error or property fetch failure). In this case,
97 // handle-missing-object-paths does not apply. The sensor build fails,
98 // and the control loop will keep restarting until getProperties
99 // succeeds.
100
101 // Only CASE1 may send the zone to failsafe mode if the sensor is not
102 // in MissingIsAcceptable. CASE2 results in continuous restart until
103 // recovery.
104
105 failed = true;
106 settings.value = std::numeric_limits<double>::quiet_NaN();
107 settings.unit = getSensorUnit(type);
108 settings.available = false;
109 settings.unavailableAsFailed = true;
110 if (info->ignoreDbusMinMax)
111 {
112 settings.min = 0;
113 settings.max = 0;
114 }
115 std::cerr << "DbusPassive: Sensor " << path
116 << " is missing from D-Bus, build this sensor as failed\n";
117 return std::make_unique<DbusPassive>(
118 bus, type, id, std::move(helper), settings, failed, path,
119 redundancy);
120 #endif
121 }
122
123 try
124 {
125 helper->getProperties(service, path, &settings);
126 failed = helper->thresholdsAsserted(service, path);
127 }
128 catch (const std::exception& e)
129 {
130 return nullptr;
131 }
132
133 /* if these values are zero, they're ignored. */
134 if (info->ignoreDbusMinMax)
135 {
136 settings.min = 0;
137 settings.max = 0;
138 }
139
140 settings.unavailableAsFailed = info->unavailableAsFailed;
141
142 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
143 settings, failed, path, redundancy);
144 }
145
DbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const SensorProperties & settings,bool failed,const std::string & path,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)146 DbusPassive::DbusPassive(
147 sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
148 std::unique_ptr<DbusHelperInterface> helper,
149 const SensorProperties& settings, bool failed, const std::string& path,
150 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
151 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
152 _id(id), _helper(std::move(helper)), _failed(failed), path(path),
153 redundancy(redundancy)
154
155 {
156 _scale = settings.scale;
157 _min = settings.min * std::pow(10.0, _scale);
158 _max = settings.max * std::pow(10.0, _scale);
159 _available = settings.available;
160 _unavailableAsFailed = settings.unavailableAsFailed;
161
162 // Cache this type knowledge, to avoid repeated string comparison
163 _typeMargin = (type == "margin");
164 _typeFan = (type == "fan");
165
166 // Force value to be stored, otherwise member would be uninitialized
167 updateValue(settings.value, true);
168 }
169
read(void)170 ReadReturn DbusPassive::read(void)
171 {
172 std::lock_guard<std::mutex> guard(_lock);
173
174 ReadReturn r = {_value, _updated, _unscaled};
175
176 return r;
177 }
178
setValue(double value,double unscaled)179 void DbusPassive::setValue(double value, double unscaled)
180 {
181 std::lock_guard<std::mutex> guard(_lock);
182
183 _value = value;
184 _unscaled = unscaled;
185 _updated = std::chrono::high_resolution_clock::now();
186 }
187
setValue(double value)188 void DbusPassive::setValue(double value)
189 {
190 // First param is scaled, second param is unscaled, assume same here
191 setValue(value, value);
192 }
193
getFailed(void) const194 bool DbusPassive::getFailed(void) const
195 {
196 if (redundancy)
197 {
198 const std::set<std::string>& failures = redundancy->getFailed();
199 if (failures.find(path) != failures.end())
200 {
201 outputFailsafeLogWithSensor(_id, true, _id,
202 "The sensor path is marked redundant.");
203 return true;
204 }
205 }
206
207 /*
208 * Unavailable thermal sensors, who are not present or
209 * power-state-not-matching, should not trigger the failSafe mode. For
210 * example, when a system stays at a powered-off state, its CPU Temp
211 * sensors will be unavailable, these unavailable sensors should not be
212 * treated as failed and trigger failSafe.
213 * This is important for systems whose Fans are always on.
214 */
215 if (!_typeFan && !_available && !_unavailableAsFailed)
216 {
217 return false;
218 }
219
220 // If a reading has came in,
221 // but its value bad in some way (determined by sensor type),
222 // indicate this sensor has failed,
223 // until another value comes in that is no longer bad.
224 // This is different from the overall _failed flag,
225 // which is set and cleared by other causes.
226 if (_badReading)
227 {
228 outputFailsafeLogWithSensor(_id, true, _id,
229 "The sensor has bad readings.");
230 return true;
231 }
232
233 // If a reading has came in, and it is not a bad reading,
234 // but it indicates there is no more thermal margin left,
235 // that is bad, something is wrong with the PID loops,
236 // they are not cooling the system, enable failsafe mode also.
237 if (_marginHot)
238 {
239 outputFailsafeLogWithSensor(_id, true, _id,
240 "The sensor has no thermal margin left.");
241 return true;
242 }
243
244 if (_failed)
245 {
246 outputFailsafeLogWithSensor(
247 _id, true, _id, "The sensor has failed with a critical issue.");
248 return true;
249 }
250
251 if (!_available)
252 {
253 outputFailsafeLogWithSensor(_id, true, _id,
254 "The sensor is unavailable.");
255 return true;
256 }
257
258 if (!_functional)
259 {
260 outputFailsafeLogWithSensor(_id, true, _id,
261 "The sensor is not functional.");
262 return true;
263 }
264
265 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
266
267 return false;
268 }
269
getFailReason(void) const270 std::string DbusPassive::getFailReason(void) const
271 {
272 if (_badReading)
273 {
274 return "Sensor reading bad";
275 }
276 if (_marginHot)
277 {
278 return "Margin hot";
279 }
280 if (_failed)
281 {
282 return "Sensor threshold asserted";
283 }
284 if (!_available)
285 {
286 return "Sensor unavailable";
287 }
288 if (!_functional)
289 {
290 return "Sensor not functional";
291 }
292 return "Unknown";
293 }
294
setFailed(bool value)295 void DbusPassive::setFailed(bool value)
296 {
297 _failed = value;
298 }
299
setFunctional(bool value)300 void DbusPassive::setFunctional(bool value)
301 {
302 _functional = value;
303 }
304
setAvailable(bool value)305 void DbusPassive::setAvailable(bool value)
306 {
307 _available = value;
308 }
309
getScale(void)310 int64_t DbusPassive::getScale(void)
311 {
312 return _scale;
313 }
314
getID(void)315 std::string DbusPassive::getID(void)
316 {
317 return _id;
318 }
319
getMax(void)320 double DbusPassive::getMax(void)
321 {
322 return _max;
323 }
324
getMin(void)325 double DbusPassive::getMin(void)
326 {
327 return _min;
328 }
329
updateValue(double value,bool force)330 void DbusPassive::updateValue(double value, bool force)
331 {
332 _badReading = false;
333
334 // Do not let a NAN, or other floating-point oddity, be used to update
335 // the value, as that indicates the sensor has no valid reading.
336 if (!(std::isfinite(value)))
337 {
338 _badReading = true;
339
340 // Do not continue with a bad reading, unless caller forcing
341 if (!force)
342 {
343 return;
344 }
345 }
346
347 value *= std::pow(10.0, _scale);
348
349 auto unscaled = value;
350 scaleSensorReading(_min, _max, value);
351
352 if (_typeMargin)
353 {
354 _marginHot = false;
355
356 // Unlike an absolute temperature sensor,
357 // where 0 degrees C is a good reading,
358 // a value received of 0 (or negative) margin is worrisome,
359 // and should be flagged.
360 // Either it indicates margin not calculated properly,
361 // or somebody forgot to set the margin-zero setpoint,
362 // or the system is really overheating that much.
363 // This is a different condition from _failed
364 // and _badReading, so it merits its own flag.
365 // The sensor has not failed, the reading is good, but the zone
366 // still needs to know that it should go to failsafe mode.
367 if (unscaled <= 0.0)
368 {
369 _marginHot = true;
370 }
371 }
372
373 setValue(value, unscaled);
374 }
375
handleSensorValue(sdbusplus::message_t & msg,DbusPassive * owner)376 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
377 {
378 std::string msgSensor;
379 std::map<std::string, std::variant<int64_t, double, bool>> msgData;
380
381 msg.read(msgSensor, msgData);
382
383 if (msgSensor == "xyz.openbmc_project.Sensor.Value")
384 {
385 auto valPropMap = msgData.find("Value");
386 if (valPropMap != msgData.end())
387 {
388 double value =
389 std::visit(VariantToDoubleVisitor(), valPropMap->second);
390
391 owner->updateValue(value, false);
392 }
393 }
394 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
395 {
396 auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
397 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
398 if (criticalAlarmHigh == msgData.end() &&
399 criticalAlarmLow == msgData.end())
400 {
401 return 0;
402 }
403
404 bool asserted = false;
405 if (criticalAlarmLow != msgData.end())
406 {
407 asserted = std::get<bool>(criticalAlarmLow->second);
408 }
409
410 // checking both as in theory you could de-assert one threshold and
411 // assert the other at the same moment
412 if (!asserted && criticalAlarmHigh != msgData.end())
413 {
414 asserted = std::get<bool>(criticalAlarmHigh->second);
415 }
416 owner->setFailed(asserted);
417 }
418 #ifdef UNC_FAILSAFE
419 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
420 {
421 auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
422 if (warningAlarmHigh == msgData.end())
423 {
424 return 0;
425 }
426
427 bool asserted = false;
428 if (warningAlarmHigh != msgData.end())
429 {
430 asserted = std::get<bool>(warningAlarmHigh->second);
431 }
432 owner->setFailed(asserted);
433 }
434 #endif
435 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
436 {
437 auto available = msgData.find("Available");
438 if (available == msgData.end())
439 {
440 return 0;
441 }
442 bool asserted = std::get<bool>(available->second);
443 owner->setAvailable(asserted);
444 if (!asserted)
445 {
446 // A thermal controller will continue its PID calculation and not
447 // trigger a 'failsafe' when some inputs are unavailable.
448 // So, forced to clear the value here to prevent a historical
449 // value to participate in a latter PID calculation.
450 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
451 }
452 }
453 else if (msgSensor ==
454 "xyz.openbmc_project.State.Decorator.OperationalStatus")
455 {
456 auto functional = msgData.find("Functional");
457 if (functional == msgData.end())
458 {
459 return 0;
460 }
461 bool asserted = std::get<bool>(functional->second);
462 owner->setFunctional(asserted);
463 }
464
465 return 0;
466 }
467
dbusHandleSignal(sd_bus_message * msg,void * usrData,sd_bus_error * err)468 int dbusHandleSignal(sd_bus_message* msg, void* usrData,
469 [[maybe_unused]] sd_bus_error* err)
470 {
471 auto sdbpMsg = sdbusplus::message_t(msg);
472 DbusPassive* obj = static_cast<DbusPassive*>(usrData);
473
474 return handleSensorValue(sdbpMsg, obj);
475 }
476
477 } // namespace pid_control
478