xref: /openbmc/phosphor-pid-control/dbus/dbuspassive.cpp (revision 6df8bb5086b29c43217596b194dda7fbc4e3ec4a)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "dbuspassive.hpp"
19 
20 #include "dbushelper_interface.hpp"
21 #include "dbuspassiveredundancy.hpp"
22 #include "dbusutil.hpp"
23 #include "failsafeloggers/builder.hpp"
24 #include "failsafeloggers/failsafe_logger_utility.hpp"
25 #include "util.hpp"
26 
27 #include <sdbusplus/bus.hpp>
28 
29 #include <chrono>
30 #include <cmath>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <variant>
35 
36 #include "failsafeloggers/failsafe_logger.cpp"
37 
38 namespace pid_control
39 {
40 
createDbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const conf::SensorConfig * info,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)41 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
42     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
43     std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
44     const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
45 {
46     if (helper == nullptr)
47     {
48         return nullptr;
49     }
50     if (!validType(type))
51     {
52         return nullptr;
53     }
54 
55     /* Need to get the scale and initial value */
56     /* service == busname */
57     std::string path;
58     if (info->readPath.empty())
59     {
60         path = getSensorPath(type, id);
61     }
62     else
63     {
64         path = info->readPath;
65     }
66 
67     SensorProperties settings;
68     bool failed;
69 
70     try
71     {
72         std::string service = helper->getService(sensorintf, path);
73 
74         helper->getProperties(service, path, &settings);
75         failed = helper->thresholdsAsserted(service, path);
76     }
77     catch (const std::exception& e)
78     {
79         return nullptr;
80     }
81 
82     /* if these values are zero, they're ignored. */
83     if (info->ignoreDbusMinMax)
84     {
85         settings.min = 0;
86         settings.max = 0;
87     }
88 
89     settings.unavailableAsFailed = info->unavailableAsFailed;
90 
91     return std::make_unique<DbusPassive>(bus, type, id, std::move(helper),
92                                          settings, failed, path, redundancy);
93 }
94 
DbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const SensorProperties & settings,bool failed,const std::string & path,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)95 DbusPassive::DbusPassive(
96     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
97     std::unique_ptr<DbusHelperInterface> helper,
98     const SensorProperties& settings, bool failed, const std::string& path,
99     const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
100     ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
101     _id(id), _helper(std::move(helper)), _failed(failed), path(path),
102     redundancy(redundancy)
103 
104 {
105     _scale = settings.scale;
106     _min = settings.min * std::pow(10.0, _scale);
107     _max = settings.max * std::pow(10.0, _scale);
108     _available = settings.available;
109     _unavailableAsFailed = settings.unavailableAsFailed;
110 
111     // Cache this type knowledge, to avoid repeated string comparison
112     _typeMargin = (type == "margin");
113     _typeFan = (type == "fan");
114 
115     // Force value to be stored, otherwise member would be uninitialized
116     updateValue(settings.value, true);
117 }
118 
read(void)119 ReadReturn DbusPassive::read(void)
120 {
121     std::lock_guard<std::mutex> guard(_lock);
122 
123     ReadReturn r = {_value, _updated, _unscaled};
124 
125     return r;
126 }
127 
setValue(double value,double unscaled)128 void DbusPassive::setValue(double value, double unscaled)
129 {
130     std::lock_guard<std::mutex> guard(_lock);
131 
132     _value = value;
133     _unscaled = unscaled;
134     _updated = std::chrono::high_resolution_clock::now();
135 }
136 
setValue(double value)137 void DbusPassive::setValue(double value)
138 {
139     // First param is scaled, second param is unscaled, assume same here
140     setValue(value, value);
141 }
142 
getFailed(void) const143 bool DbusPassive::getFailed(void) const
144 {
145     if (redundancy)
146     {
147         const std::set<std::string>& failures = redundancy->getFailed();
148         if (failures.find(path) != failures.end())
149         {
150             outputFailsafeLogWithSensor(_id, true, _id,
151                                         "The sensor path is marked redundant.");
152             return true;
153         }
154     }
155 
156     /*
157      * Unavailable thermal sensors, who are not present or
158      * power-state-not-matching, should not trigger the failSafe mode. For
159      * example, when a system stays at a powered-off state, its CPU Temp
160      * sensors will be unavailable, these unavailable sensors should not be
161      * treated as failed and trigger failSafe.
162      * This is important for systems whose Fans are always on.
163      */
164     if (!_typeFan && !_available && !_unavailableAsFailed)
165     {
166         return false;
167     }
168 
169     // If a reading has came in,
170     // but its value bad in some way (determined by sensor type),
171     // indicate this sensor has failed,
172     // until another value comes in that is no longer bad.
173     // This is different from the overall _failed flag,
174     // which is set and cleared by other causes.
175     if (_badReading)
176     {
177         outputFailsafeLogWithSensor(_id, true, _id,
178                                     "The sensor has bad readings.");
179         return true;
180     }
181 
182     // If a reading has came in, and it is not a bad reading,
183     // but it indicates there is no more thermal margin left,
184     // that is bad, something is wrong with the PID loops,
185     // they are not cooling the system, enable failsafe mode also.
186     if (_marginHot)
187     {
188         outputFailsafeLogWithSensor(_id, true, _id,
189                                     "The sensor has no thermal margin left.");
190         return true;
191     }
192 
193     if (_failed)
194     {
195         outputFailsafeLogWithSensor(
196             _id, true, _id, "The sensor has failed with a critical issue.");
197         return true;
198     }
199 
200     if (!_available)
201     {
202         outputFailsafeLogWithSensor(_id, true, _id,
203                                     "The sensor is unavailable.");
204         return true;
205     }
206 
207     if (!_functional)
208     {
209         outputFailsafeLogWithSensor(_id, true, _id,
210                                     "The sensor is not functional.");
211         return true;
212     }
213 
214     outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
215 
216     return false;
217 }
218 
setFailed(bool value)219 void DbusPassive::setFailed(bool value)
220 {
221     _failed = value;
222 }
223 
setFunctional(bool value)224 void DbusPassive::setFunctional(bool value)
225 {
226     _functional = value;
227 }
228 
setAvailable(bool value)229 void DbusPassive::setAvailable(bool value)
230 {
231     _available = value;
232 }
233 
getScale(void)234 int64_t DbusPassive::getScale(void)
235 {
236     return _scale;
237 }
238 
getID(void)239 std::string DbusPassive::getID(void)
240 {
241     return _id;
242 }
243 
getMax(void)244 double DbusPassive::getMax(void)
245 {
246     return _max;
247 }
248 
getMin(void)249 double DbusPassive::getMin(void)
250 {
251     return _min;
252 }
253 
updateValue(double value,bool force)254 void DbusPassive::updateValue(double value, bool force)
255 {
256     _badReading = false;
257 
258     // Do not let a NAN, or other floating-point oddity, be used to update
259     // the value, as that indicates the sensor has no valid reading.
260     if (!(std::isfinite(value)))
261     {
262         _badReading = true;
263 
264         // Do not continue with a bad reading, unless caller forcing
265         if (!force)
266         {
267             return;
268         }
269     }
270 
271     value *= std::pow(10.0, _scale);
272 
273     auto unscaled = value;
274     scaleSensorReading(_min, _max, value);
275 
276     if (_typeMargin)
277     {
278         _marginHot = false;
279 
280         // Unlike an absolute temperature sensor,
281         // where 0 degrees C is a good reading,
282         // a value received of 0 (or negative) margin is worrisome,
283         // and should be flagged.
284         // Either it indicates margin not calculated properly,
285         // or somebody forgot to set the margin-zero setpoint,
286         // or the system is really overheating that much.
287         // This is a different condition from _failed
288         // and _badReading, so it merits its own flag.
289         // The sensor has not failed, the reading is good, but the zone
290         // still needs to know that it should go to failsafe mode.
291         if (unscaled <= 0.0)
292         {
293             _marginHot = true;
294         }
295     }
296 
297     setValue(value, unscaled);
298 }
299 
handleSensorValue(sdbusplus::message_t & msg,DbusPassive * owner)300 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
301 {
302     std::string msgSensor;
303     std::map<std::string, std::variant<int64_t, double, bool>> msgData;
304 
305     msg.read(msgSensor, msgData);
306 
307     if (msgSensor == "xyz.openbmc_project.Sensor.Value")
308     {
309         auto valPropMap = msgData.find("Value");
310         if (valPropMap != msgData.end())
311         {
312             double value =
313                 std::visit(VariantToDoubleVisitor(), valPropMap->second);
314 
315             owner->updateValue(value, false);
316         }
317     }
318     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
319     {
320         auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
321         auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
322         if (criticalAlarmHigh == msgData.end() &&
323             criticalAlarmLow == msgData.end())
324         {
325             return 0;
326         }
327 
328         bool asserted = false;
329         if (criticalAlarmLow != msgData.end())
330         {
331             asserted = std::get<bool>(criticalAlarmLow->second);
332         }
333 
334         // checking both as in theory you could de-assert one threshold and
335         // assert the other at the same moment
336         if (!asserted && criticalAlarmHigh != msgData.end())
337         {
338             asserted = std::get<bool>(criticalAlarmHigh->second);
339         }
340         owner->setFailed(asserted);
341     }
342 #ifdef UNC_FAILSAFE
343     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
344     {
345         auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
346         if (warningAlarmHigh == msgData.end())
347         {
348             return 0;
349         }
350 
351         bool asserted = false;
352         if (warningAlarmHigh != msgData.end())
353         {
354             asserted = std::get<bool>(warningAlarmHigh->second);
355         }
356         owner->setFailed(asserted);
357     }
358 #endif
359     else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
360     {
361         auto available = msgData.find("Available");
362         if (available == msgData.end())
363         {
364             return 0;
365         }
366         bool asserted = std::get<bool>(available->second);
367         owner->setAvailable(asserted);
368         if (!asserted)
369         {
370             // A thermal controller will continue its PID calculation and not
371             // trigger a 'failsafe' when some inputs are unavailable.
372             // So, forced to clear the value here to prevent a historical
373             // value to participate in a latter PID calculation.
374             owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
375         }
376     }
377     else if (msgSensor ==
378              "xyz.openbmc_project.State.Decorator.OperationalStatus")
379     {
380         auto functional = msgData.find("Functional");
381         if (functional == msgData.end())
382         {
383             return 0;
384         }
385         bool asserted = std::get<bool>(functional->second);
386         owner->setFunctional(asserted);
387     }
388 
389     return 0;
390 }
391 
dbusHandleSignal(sd_bus_message * msg,void * usrData,sd_bus_error * err)392 int dbusHandleSignal(sd_bus_message* msg, void* usrData,
393                      [[maybe_unused]] sd_bus_error* err)
394 {
395     auto sdbpMsg = sdbusplus::message_t(msg);
396     DbusPassive* obj = static_cast<DbusPassive*>(usrData);
397 
398     return handleSensorValue(sdbpMsg, obj);
399 }
400 
401 } // namespace pid_control
402