xref: /openbmc/phosphor-pid-control/dbus/dbuspassive.cpp (revision 897f31c77db9178ecfe8130d43a24c6dac73ff3a)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "dbuspassive.hpp"
19 
20 #include "dbushelper_interface.hpp"
21 #include "dbuspassiveredundancy.hpp"
22 #include "dbusutil.hpp"
23 #include "failsafeloggers/builder.hpp"
24 #include "failsafeloggers/failsafe_logger_utility.hpp"
25 #include "util.hpp"
26 
27 #include <sdbusplus/bus.hpp>
28 
29 #include <chrono>
30 #include <cmath>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <variant>
35 
36 #include "failsafeloggers/failsafe_logger.cpp"
37 
38 namespace pid_control
39 {
40 
createDbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const conf::SensorConfig * info,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)41 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
42     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
43     std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
44     const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
45 {
46     if (helper == nullptr)
47     {
48         return nullptr;
49     }
50     if (!validType(type))
51     {
52         return nullptr;
53     }
54 
55     /* Need to get the scale and initial value */
56     /* service == busname */
57     std::string path;
58     if (info->readPath.empty())
59     {
60         path = getSensorPath(type, id);
61     }
62     else
63     {
64         path = info->readPath;
65     }
66 
67     SensorProperties settings;
68     bool failed;
69     std::string service;
70 
71     try
72     {
73         service = helper->getService(sensorintf, path);
74     }
75     catch (const std::exception& e)
76     {
77 #ifndef HANDLE_MISSING_OBJECT_PATHS
78         return nullptr;
79 #else
80         // CASE1: The sensor is not on DBus, but as it is not in the
81         // MissingIsAcceptable list, the sensor should be built with a failed
82         // state to send the zone to failsafe mode. Everything will recover if
83         // all important sensors are back to DBus. swampd will be informed
84         // through InterfacesAdded signals and the sensors will be built again.
85 
86         // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
87         // fails (e.g., D-Bus error or property fetch failure). In this case,
88         // handle-missing-object-paths does not apply. The sensor build fails,
89         // and the control loop will keep restarting until getProperties
90         // succeeds.
91 
92         // Only CASE1 may send the zone to failsafe mode if the sensor is not
93         // in MissingIsAcceptable. CASE2 results in continuous restart until
94         // recovery.
95 
96         failed = true;
97         settings.value = std::numeric_limits<double>::quiet_NaN();
98         settings.unit = getSensorUnit(type);
99         settings.available = false;
100         settings.unavailableAsFailed = true;
101         if (info->ignoreDbusMinMax)
102         {
103             settings.min = 0;
104             settings.max = 0;
105         }
106         std::cerr << "DbusPassive: Sensor " << path
107                   << " is missing from D-Bus, build this sensor as failed\n";
108         return std::make_unique<DbusPassive>(
109             bus, type, id, std::move(helper), settings, failed, path,
110             redundancy);
111 #endif
112     }
113 
114     try
115     {
116         helper->getProperties(service, path, &settings);
117         failed = helper->thresholdsAsserted(service, path);
118     }
119     catch (const std::exception& e)
120     {
121         return nullptr;
122     }
123 
124     /* if these values are zero, they're ignored. */
125     if (info->ignoreDbusMinMax)
126     {
127         settings.min = 0;
128         settings.max = 0;
129     }
130 
131     settings.unavailableAsFailed = info->unavailableAsFailed;
132 
133     return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
134                                          settings, failed, path, redundancy);
135 }
136 
DbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const SensorProperties & settings,bool failed,const std::string & path,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)137 DbusPassive::DbusPassive(
138     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
139     std::unique_ptr<DbusHelperInterface> helper,
140     const SensorProperties& settings, bool failed, const std::string& path,
141     const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
142     ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
143     _id(id), _helper(std::move(helper)), _failed(failed), path(path),
144     redundancy(redundancy)
145 
146 {
147     _scale = settings.scale;
148     _min = settings.min * std::pow(10.0, _scale);
149     _max = settings.max * std::pow(10.0, _scale);
150     _available = settings.available;
151     _unavailableAsFailed = settings.unavailableAsFailed;
152 
153     // Cache this type knowledge, to avoid repeated string comparison
154     _typeMargin = (type == "margin");
155     _typeFan = (type == "fan");
156 
157     // Force value to be stored, otherwise member would be uninitialized
158     updateValue(settings.value, true);
159 }
160 
read(void)161 ReadReturn DbusPassive::read(void)
162 {
163     std::lock_guard<std::mutex> guard(_lock);
164 
165     ReadReturn r = {_value, _updated, _unscaled};
166 
167     return r;
168 }
169 
setValue(double value,double unscaled)170 void DbusPassive::setValue(double value, double unscaled)
171 {
172     std::lock_guard<std::mutex> guard(_lock);
173 
174     _value = value;
175     _unscaled = unscaled;
176     _updated = std::chrono::high_resolution_clock::now();
177 }
178 
setValue(double value)179 void DbusPassive::setValue(double value)
180 {
181     // First param is scaled, second param is unscaled, assume same here
182     setValue(value, value);
183 }
184 
getFailed(void) const185 bool DbusPassive::getFailed(void) const
186 {
187     if (redundancy)
188     {
189         const std::set<std::string>& failures = redundancy->getFailed();
190         if (failures.find(path) != failures.end())
191         {
192             outputFailsafeLogWithSensor(_id, true, _id,
193                                         "The sensor path is marked redundant.");
194             return true;
195         }
196     }
197 
198     /*
199      * Unavailable thermal sensors, who are not present or
200      * power-state-not-matching, should not trigger the failSafe mode. For
201      * example, when a system stays at a powered-off state, its CPU Temp
202      * sensors will be unavailable, these unavailable sensors should not be
203      * treated as failed and trigger failSafe.
204      * This is important for systems whose Fans are always on.
205      */
206     if (!_typeFan && !_available && !_unavailableAsFailed)
207     {
208         return false;
209     }
210 
211     // If a reading has came in,
212     // but its value bad in some way (determined by sensor type),
213     // indicate this sensor has failed,
214     // until another value comes in that is no longer bad.
215     // This is different from the overall _failed flag,
216     // which is set and cleared by other causes.
217     if (_badReading)
218     {
219         outputFailsafeLogWithSensor(_id, true, _id,
220                                     "The sensor has bad readings.");
221         return true;
222     }
223 
224     // If a reading has came in, and it is not a bad reading,
225     // but it indicates there is no more thermal margin left,
226     // that is bad, something is wrong with the PID loops,
227     // they are not cooling the system, enable failsafe mode also.
228     if (_marginHot)
229     {
230         outputFailsafeLogWithSensor(_id, true, _id,
231                                     "The sensor has no thermal margin left.");
232         return true;
233     }
234 
235     if (_failed)
236     {
237         outputFailsafeLogWithSensor(
238             _id, true, _id, "The sensor has failed with a critical issue.");
239         return true;
240     }
241 
242     if (!_available)
243     {
244         outputFailsafeLogWithSensor(_id, true, _id,
245                                     "The sensor is unavailable.");
246         return true;
247     }
248 
249     if (!_functional)
250     {
251         outputFailsafeLogWithSensor(_id, true, _id,
252                                     "The sensor is not functional.");
253         return true;
254     }
255 
256     outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
257 
258     return false;
259 }
260 
getFailReason(void) const261 std::string DbusPassive::getFailReason(void) const
262 {
263     if (_badReading)
264     {
265         return "Sensor reading bad";
266     }
267     if (_marginHot)
268     {
269         return "Margin hot";
270     }
271     if (_failed)
272     {
273         return "Sensor threshold asserted";
274     }
275     if (!_available)
276     {
277         return "Sensor unavailable";
278     }
279     if (!_functional)
280     {
281         return "Sensor not functional";
282     }
283     return "Unknown";
284 }
285 
setFailed(bool value)286 void DbusPassive::setFailed(bool value)
287 {
288     _failed = value;
289 }
290 
setFunctional(bool value)291 void DbusPassive::setFunctional(bool value)
292 {
293     _functional = value;
294 }
295 
setAvailable(bool value)296 void DbusPassive::setAvailable(bool value)
297 {
298     _available = value;
299 }
300 
getScale(void)301 int64_t DbusPassive::getScale(void)
302 {
303     return _scale;
304 }
305 
getID(void)306 std::string DbusPassive::getID(void)
307 {
308     return _id;
309 }
310 
getMax(void)311 double DbusPassive::getMax(void)
312 {
313     return _max;
314 }
315 
getMin(void)316 double DbusPassive::getMin(void)
317 {
318     return _min;
319 }
320 
updateValue(double value,bool force)321 void DbusPassive::updateValue(double value, bool force)
322 {
323     _badReading = false;
324 
325     // Do not let a NAN, or other floating-point oddity, be used to update
326     // the value, as that indicates the sensor has no valid reading.
327     if (!(std::isfinite(value)))
328     {
329         _badReading = true;
330 
331         // Do not continue with a bad reading, unless caller forcing
332         if (!force)
333         {
334             return;
335         }
336     }
337 
338     value *= std::pow(10.0, _scale);
339 
340     auto unscaled = value;
341     scaleSensorReading(_min, _max, value);
342 
343     if (_typeMargin)
344     {
345         _marginHot = false;
346 
347         // Unlike an absolute temperature sensor,
348         // where 0 degrees C is a good reading,
349         // a value received of 0 (or negative) margin is worrisome,
350         // and should be flagged.
351         // Either it indicates margin not calculated properly,
352         // or somebody forgot to set the margin-zero setpoint,
353         // or the system is really overheating that much.
354         // This is a different condition from _failed
355         // and _badReading, so it merits its own flag.
356         // The sensor has not failed, the reading is good, but the zone
357         // still needs to know that it should go to failsafe mode.
358         if (unscaled <= 0.0)
359         {
360             _marginHot = true;
361         }
362     }
363 
364     setValue(value, unscaled);
365 }
366 
handleSensorValue(sdbusplus::message_t & msg,DbusPassive * owner)367 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
368 {
369     std::string msgSensor;
370     std::map<std::string, std::variant<int64_t, double, bool>> msgData;
371 
372     msg.read(msgSensor, msgData);
373 
374     if (msgSensor == "xyz.openbmc_project.Sensor.Value")
375     {
376         auto valPropMap = msgData.find("Value");
377         if (valPropMap != msgData.end())
378         {
379             double value =
380                 std::visit(VariantToDoubleVisitor(), valPropMap->second);
381 
382             owner->updateValue(value, false);
383         }
384     }
385     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
386     {
387         auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
388         auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
389         if (criticalAlarmHigh == msgData.end() &&
390             criticalAlarmLow == msgData.end())
391         {
392             return 0;
393         }
394 
395         bool asserted = false;
396         if (criticalAlarmLow != msgData.end())
397         {
398             asserted = std::get<bool>(criticalAlarmLow->second);
399         }
400 
401         // checking both as in theory you could de-assert one threshold and
402         // assert the other at the same moment
403         if (!asserted && criticalAlarmHigh != msgData.end())
404         {
405             asserted = std::get<bool>(criticalAlarmHigh->second);
406         }
407         owner->setFailed(asserted);
408     }
409 #ifdef UNC_FAILSAFE
410     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
411     {
412         auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
413         if (warningAlarmHigh == msgData.end())
414         {
415             return 0;
416         }
417 
418         bool asserted = false;
419         if (warningAlarmHigh != msgData.end())
420         {
421             asserted = std::get<bool>(warningAlarmHigh->second);
422         }
423         owner->setFailed(asserted);
424     }
425 #endif
426     else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
427     {
428         auto available = msgData.find("Available");
429         if (available == msgData.end())
430         {
431             return 0;
432         }
433         bool asserted = std::get<bool>(available->second);
434         owner->setAvailable(asserted);
435         if (!asserted)
436         {
437             // A thermal controller will continue its PID calculation and not
438             // trigger a 'failsafe' when some inputs are unavailable.
439             // So, forced to clear the value here to prevent a historical
440             // value to participate in a latter PID calculation.
441             owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
442         }
443     }
444     else if (msgSensor ==
445              "xyz.openbmc_project.State.Decorator.OperationalStatus")
446     {
447         auto functional = msgData.find("Functional");
448         if (functional == msgData.end())
449         {
450             return 0;
451         }
452         bool asserted = std::get<bool>(functional->second);
453         owner->setFunctional(asserted);
454     }
455 
456     return 0;
457 }
458 
dbusHandleSignal(sd_bus_message * msg,void * usrData,sd_bus_error * err)459 int dbusHandleSignal(sd_bus_message* msg, void* usrData,
460                      [[maybe_unused]] sd_bus_error* err)
461 {
462     auto sdbpMsg = sdbusplus::message_t(msg);
463     DbusPassive* obj = static_cast<DbusPassive*>(usrData);
464 
465     return handleSensorValue(sdbpMsg, obj);
466 }
467 
468 } // namespace pid_control
469