xref: /openbmc/phosphor-pid-control/dbus/dbuspassive.cpp (revision a4270075f7cbdb2dee38f444a59e25b96d8128f4)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "dbuspassive.hpp"
19 
20 #include "dbushelper_interface.hpp"
21 #include "dbuspassiveredundancy.hpp"
22 #include "dbusutil.hpp"
23 #include "failsafeloggers/builder.hpp"
24 #include "failsafeloggers/failsafe_logger_utility.hpp"
25 #include "util.hpp"
26 
27 #include <sdbusplus/bus.hpp>
28 
29 #include <chrono>
30 #include <cmath>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <variant>
35 
36 #include "failsafeloggers/failsafe_logger.cpp"
37 
38 namespace pid_control
39 {
40 
41 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
42     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
43     std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
44     const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
45 {
46     if (helper == nullptr)
47     {
48         return nullptr;
49     }
50     if (!validType(type))
51     {
52         return nullptr;
53     }
54 
55     /* Need to get the scale and initial value */
56     /* service == busname */
57     std::string path;
58     if (info->readPath.empty())
59     {
60         path = getSensorPath(type, id);
61     }
62     else
63     {
64         path = info->readPath;
65     }
66 
67     SensorProperties settings;
68     bool failed;
69 
70     try
71     {
72         std::string service = helper->getService(sensorintf, path);
73 
74         helper->getProperties(service, path, &settings);
75         failed = helper->thresholdsAsserted(service, path);
76     }
77     catch (const std::exception& e)
78     {
79 #ifndef HANDLE_MISSING_OBJECT_PATHS
80         return nullptr;
81 #else
82         // CASE1: The sensor is not on DBus, but as it is not in the
83         // MissingIsAcceptable list, the sensor should be built with a failed
84         // state to send the zone to failsafe mode. Everything will recover if
85         // all important sensors are back to DBus. swampd will be informed
86         // through InterfacesAdded signals and the sensors will be built again.
87 
88         // CASE2: The sensor is in the MissingIsAcceptable list and it EXISTS on
89         // DBus (which sends it all the way here). However, swampd fails to
90         // initialize its setting here because of some DBus error???
91         // (getService/getProperties/getThresholdAssertion). Build it as a
92         // failed sensor too. A DBus signal will inform if there's s new
93         // property value to the sensor and will recover its state when the new
94         // value is valid.
95 
96         // In both cases, the Sensor::getFailed() and
97         // DbusPidZone::markSensorMissing() APIs will decide whether to add a
98         // failed sensor to the _failSafeSensors list. As _failed=true,
99         // _available=false and _badReading=false (due to updateValue(nan,
100         // true)), both cases will have getFailed()=true at the beginning as
101         // long as _unavailableAsFailed=true; However as CASE2 has the sensor in
102         // MissingIsAcceptable list, only CASE1 will send the zone to failSafe
103         // mode.
104 
105         failed = true;
106         settings.value = std::numeric_limits<double>::quiet_NaN();
107         settings.unit = getSensorUnit(type);
108         settings.available = false;
109         std::cerr << "DbusPassive: Sensor " << path
110                   << " is missing from D-Bus, build this sensor as failed\n";
111 #endif
112     }
113 
114     /* if these values are zero, they're ignored. */
115     if (info->ignoreDbusMinMax)
116     {
117         settings.min = 0;
118         settings.max = 0;
119     }
120 
121     settings.unavailableAsFailed = info->unavailableAsFailed;
122 
123     return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
124                                          settings, failed, path, redundancy);
125 }
126 
127 DbusPassive::DbusPassive(
128     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
129     std::unique_ptr<DbusHelperInterface> helper,
130     const SensorProperties& settings, bool failed, const std::string& path,
131     const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
132     ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
133     _id(id), _helper(std::move(helper)), _failed(failed), path(path),
134     redundancy(redundancy)
135 
136 {
137     _scale = settings.scale;
138     _min = settings.min * std::pow(10.0, _scale);
139     _max = settings.max * std::pow(10.0, _scale);
140     _available = settings.available;
141     _unavailableAsFailed = settings.unavailableAsFailed;
142 
143     // Cache this type knowledge, to avoid repeated string comparison
144     _typeMargin = (type == "margin");
145     _typeFan = (type == "fan");
146 
147     // Force value to be stored, otherwise member would be uninitialized
148     updateValue(settings.value, true);
149 }
150 
151 ReadReturn DbusPassive::read(void)
152 {
153     std::lock_guard<std::mutex> guard(_lock);
154 
155     ReadReturn r = {_value, _updated, _unscaled};
156 
157     return r;
158 }
159 
160 void DbusPassive::setValue(double value, double unscaled)
161 {
162     std::lock_guard<std::mutex> guard(_lock);
163 
164     _value = value;
165     _unscaled = unscaled;
166     _updated = std::chrono::high_resolution_clock::now();
167 }
168 
169 void DbusPassive::setValue(double value)
170 {
171     // First param is scaled, second param is unscaled, assume same here
172     setValue(value, value);
173 }
174 
175 bool DbusPassive::getFailed(void) const
176 {
177     if (redundancy)
178     {
179         const std::set<std::string>& failures = redundancy->getFailed();
180         if (failures.find(path) != failures.end())
181         {
182             outputFailsafeLogWithSensor(_id, true, _id,
183                                         "The sensor path is marked redundant.");
184             return true;
185         }
186     }
187 
188     /*
189      * Unavailable thermal sensors, who are not present or
190      * power-state-not-matching, should not trigger the failSafe mode. For
191      * example, when a system stays at a powered-off state, its CPU Temp
192      * sensors will be unavailable, these unavailable sensors should not be
193      * treated as failed and trigger failSafe.
194      * This is important for systems whose Fans are always on.
195      */
196     if (!_typeFan && !_available && !_unavailableAsFailed)
197     {
198         return false;
199     }
200 
201     // If a reading has came in,
202     // but its value bad in some way (determined by sensor type),
203     // indicate this sensor has failed,
204     // until another value comes in that is no longer bad.
205     // This is different from the overall _failed flag,
206     // which is set and cleared by other causes.
207     if (_badReading)
208     {
209         outputFailsafeLogWithSensor(_id, true, _id,
210                                     "The sensor has bad readings.");
211         return true;
212     }
213 
214     // If a reading has came in, and it is not a bad reading,
215     // but it indicates there is no more thermal margin left,
216     // that is bad, something is wrong with the PID loops,
217     // they are not cooling the system, enable failsafe mode also.
218     if (_marginHot)
219     {
220         outputFailsafeLogWithSensor(_id, true, _id,
221                                     "The sensor has no thermal margin left.");
222         return true;
223     }
224 
225     if (_failed)
226     {
227         outputFailsafeLogWithSensor(
228             _id, true, _id, "The sensor has failed with a critical issue.");
229         return true;
230     }
231 
232     if (!_available)
233     {
234         outputFailsafeLogWithSensor(_id, true, _id,
235                                     "The sensor is unavailable.");
236         return true;
237     }
238 
239     if (!_functional)
240     {
241         outputFailsafeLogWithSensor(_id, true, _id,
242                                     "The sensor is not functional.");
243         return true;
244     }
245 
246     outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
247 
248     return false;
249 }
250 
251 std::string DbusPassive::getFailReason(void) const
252 {
253     if (_badReading)
254     {
255         return "Sensor reading bad";
256     }
257     if (_marginHot)
258     {
259         return "Margin hot";
260     }
261     if (_failed)
262     {
263         return "Sensor threshold asserted";
264     }
265     if (!_available)
266     {
267         return "Sensor unavailable";
268     }
269     if (!_functional)
270     {
271         return "Sensor not functional";
272     }
273     return "Unknown";
274 }
275 
276 void DbusPassive::setFailed(bool value)
277 {
278     _failed = value;
279 }
280 
281 void DbusPassive::setFunctional(bool value)
282 {
283     _functional = value;
284 }
285 
286 void DbusPassive::setAvailable(bool value)
287 {
288     _available = value;
289 }
290 
291 int64_t DbusPassive::getScale(void)
292 {
293     return _scale;
294 }
295 
296 std::string DbusPassive::getID(void)
297 {
298     return _id;
299 }
300 
301 double DbusPassive::getMax(void)
302 {
303     return _max;
304 }
305 
306 double DbusPassive::getMin(void)
307 {
308     return _min;
309 }
310 
311 void DbusPassive::updateValue(double value, bool force)
312 {
313     _badReading = false;
314 
315     // Do not let a NAN, or other floating-point oddity, be used to update
316     // the value, as that indicates the sensor has no valid reading.
317     if (!(std::isfinite(value)))
318     {
319         _badReading = true;
320 
321         // Do not continue with a bad reading, unless caller forcing
322         if (!force)
323         {
324             return;
325         }
326     }
327 
328     value *= std::pow(10.0, _scale);
329 
330     auto unscaled = value;
331     scaleSensorReading(_min, _max, value);
332 
333     if (_typeMargin)
334     {
335         _marginHot = false;
336 
337         // Unlike an absolute temperature sensor,
338         // where 0 degrees C is a good reading,
339         // a value received of 0 (or negative) margin is worrisome,
340         // and should be flagged.
341         // Either it indicates margin not calculated properly,
342         // or somebody forgot to set the margin-zero setpoint,
343         // or the system is really overheating that much.
344         // This is a different condition from _failed
345         // and _badReading, so it merits its own flag.
346         // The sensor has not failed, the reading is good, but the zone
347         // still needs to know that it should go to failsafe mode.
348         if (unscaled <= 0.0)
349         {
350             _marginHot = true;
351         }
352     }
353 
354     setValue(value, unscaled);
355 }
356 
357 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
358 {
359     std::string msgSensor;
360     std::map<std::string, std::variant<int64_t, double, bool>> msgData;
361 
362     msg.read(msgSensor, msgData);
363 
364     if (msgSensor == "xyz.openbmc_project.Sensor.Value")
365     {
366         auto valPropMap = msgData.find("Value");
367         if (valPropMap != msgData.end())
368         {
369             double value =
370                 std::visit(VariantToDoubleVisitor(), valPropMap->second);
371 
372             owner->updateValue(value, false);
373         }
374     }
375     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
376     {
377         auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
378         auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
379         if (criticalAlarmHigh == msgData.end() &&
380             criticalAlarmLow == msgData.end())
381         {
382             return 0;
383         }
384 
385         bool asserted = false;
386         if (criticalAlarmLow != msgData.end())
387         {
388             asserted = std::get<bool>(criticalAlarmLow->second);
389         }
390 
391         // checking both as in theory you could de-assert one threshold and
392         // assert the other at the same moment
393         if (!asserted && criticalAlarmHigh != msgData.end())
394         {
395             asserted = std::get<bool>(criticalAlarmHigh->second);
396         }
397         owner->setFailed(asserted);
398     }
399 #ifdef UNC_FAILSAFE
400     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
401     {
402         auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
403         if (warningAlarmHigh == msgData.end())
404         {
405             return 0;
406         }
407 
408         bool asserted = false;
409         if (warningAlarmHigh != msgData.end())
410         {
411             asserted = std::get<bool>(warningAlarmHigh->second);
412         }
413         owner->setFailed(asserted);
414     }
415 #endif
416     else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
417     {
418         auto available = msgData.find("Available");
419         if (available == msgData.end())
420         {
421             return 0;
422         }
423         bool asserted = std::get<bool>(available->second);
424         owner->setAvailable(asserted);
425         if (!asserted)
426         {
427             // A thermal controller will continue its PID calculation and not
428             // trigger a 'failsafe' when some inputs are unavailable.
429             // So, forced to clear the value here to prevent a historical
430             // value to participate in a latter PID calculation.
431             owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
432         }
433     }
434     else if (msgSensor ==
435              "xyz.openbmc_project.State.Decorator.OperationalStatus")
436     {
437         auto functional = msgData.find("Functional");
438         if (functional == msgData.end())
439         {
440             return 0;
441         }
442         bool asserted = std::get<bool>(functional->second);
443         owner->setFunctional(asserted);
444     }
445 
446     return 0;
447 }
448 
449 int dbusHandleSignal(sd_bus_message* msg, void* usrData,
450                      [[maybe_unused]] sd_bus_error* err)
451 {
452     auto sdbpMsg = sdbusplus::message_t(msg);
453     DbusPassive* obj = static_cast<DbusPassive*>(usrData);
454 
455     return handleSensorValue(sdbpMsg, obj);
456 }
457 
458 } // namespace pid_control
459