1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "dbuspassive.hpp"
19 
20 #include "conf.hpp"
21 #include "dbushelper_interface.hpp"
22 #include "dbuspassiveredundancy.hpp"
23 #include "dbusutil.hpp"
24 #include "failsafeloggers/failsafe_logger_utility.hpp"
25 #include "interfaces.hpp"
26 #include "util.hpp"
27 
28 #include <systemd/sd-bus.h>
29 
30 #include <sdbusplus/bus.hpp>
31 #include <sdbusplus/message.hpp>
32 
33 #include <chrono>
34 #include <cmath>
35 #include <cstdint>
36 #include <exception>
37 #include <limits>
38 #include <map>
39 #include <memory>
40 #include <mutex>
41 #include <set>
42 #include <string>
43 #include <utility>
44 #include <variant>
45 
46 #include "failsafeloggers/failsafe_logger.cpp"
47 
48 namespace pid_control
49 {
50 
createDbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const conf::SensorConfig * info,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)51 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive(
52     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
53     std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info,
54     const std::shared_ptr<DbusPassiveRedundancy>& redundancy)
55 {
56     if (helper == nullptr)
57     {
58         return nullptr;
59     }
60     if (!validType(type))
61     {
62         return nullptr;
63     }
64 
65     /* Need to get the scale and initial value */
66     /* service == busname */
67     std::string path;
68     if (info->readPath.empty())
69     {
70         path = getSensorPath(type, id);
71     }
72     else
73     {
74         path = info->readPath;
75     }
76 
77     SensorProperties settings;
78     bool failed;
79     bool objectMissing = false;
80     std::string service;
81 
82     try
83     {
84         service = helper->getService(sensorintf, path);
85     }
86     catch (const std::exception& e)
87     {
88 #ifndef HANDLE_MISSING_OBJECT_PATHS
89         return nullptr;
90 #else
91         // CASE1: The sensor is not on DBus, but as it is not in the
92         // MissingIsAcceptable list, the sensor should be built with a failed
93         // state to send the zone to failsafe mode. Everything will recover if
94         // all important sensors are back to DBus. swampd will be informed
95         // through InterfacesAdded signals and the sensors will be built again.
96 
97         // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties
98         // fails (e.g., D-Bus error or property fetch failure). In this case,
99         // handle-missing-object-paths does not apply. The sensor build fails,
100         // and the control loop will keep restarting until getProperties
101         // succeeds.
102 
103         // Only CASE1 may send the zone to failsafe mode if the sensor is not
104         // in MissingIsAcceptable. CASE2 results in continuous restart until
105         // recovery.
106 
107         failed = true;
108         objectMissing = true;
109         settings.value = std::numeric_limits<double>::quiet_NaN();
110         settings.unit = getSensorUnit(type);
111         settings.available = false;
112         settings.unavailableAsFailed = true;
113         if (info->ignoreDbusMinMax)
114         {
115             settings.min = 0;
116             settings.max = 0;
117         }
118         std::cerr << "DbusPassive: Sensor " << path
119                   << " is missing from D-Bus, build this sensor as failed\n";
120         return std::make_unique<DbusPassive>(
121             bus, type, id, std::move(helper), settings, failed, objectMissing,
122             path, redundancy);
123 #endif
124     }
125 
126     try
127     {
128         helper->getProperties(service, path, &settings);
129         failed = helper->thresholdsAsserted(service, path);
130     }
131     catch (const std::exception& e)
132     {
133         return nullptr;
134     }
135 
136     /* if these values are zero, they're ignored. */
137     if (info->ignoreDbusMinMax)
138     {
139         settings.min = 0;
140         settings.max = 0;
141     }
142 
143     settings.unavailableAsFailed = info->unavailableAsFailed;
144 
145     return std::make_unique<DbusPassive>(
146         bus, type, id, std::move(helper), settings, failed, objectMissing, path,
147         redundancy);
148 }
149 
DbusPassive(sdbusplus::bus_t & bus,const std::string & type,const std::string & id,std::unique_ptr<DbusHelperInterface> helper,const SensorProperties & settings,bool failed,bool objectMissing,const std::string & path,const std::shared_ptr<DbusPassiveRedundancy> & redundancy)150 DbusPassive::DbusPassive(
151     sdbusplus::bus_t& bus, const std::string& type, const std::string& id,
152     std::unique_ptr<DbusHelperInterface> helper,
153     const SensorProperties& settings, bool failed, bool objectMissing,
154     const std::string& path,
155     const std::shared_ptr<DbusPassiveRedundancy>& redundancy) :
156     ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this),
157     _id(id), _helper(std::move(helper)), _failed(failed),
158     _objectMissing(objectMissing), path(path), redundancy(redundancy)
159 
160 {
161     _scale = settings.scale;
162     _min = settings.min * std::pow(10.0, _scale);
163     _max = settings.max * std::pow(10.0, _scale);
164     _available = settings.available;
165     _unavailableAsFailed = settings.unavailableAsFailed;
166 
167     // Cache this type knowledge, to avoid repeated string comparison
168     _typeMargin = (type == "margin");
169     _typeFan = (type == "fan");
170 
171     // Force value to be stored, otherwise member would be uninitialized
172     updateValue(settings.value, true);
173 }
174 
read(void)175 ReadReturn DbusPassive::read(void)
176 {
177     std::lock_guard<std::mutex> guard(_lock);
178 
179     ReadReturn r = {_value, _updated, _unscaled};
180 
181     return r;
182 }
183 
setValue(double value,double unscaled)184 void DbusPassive::setValue(double value, double unscaled)
185 {
186     std::lock_guard<std::mutex> guard(_lock);
187 
188     _value = value;
189     _unscaled = unscaled;
190     _updated = std::chrono::high_resolution_clock::now();
191 }
192 
setValue(double value)193 void DbusPassive::setValue(double value)
194 {
195     // First param is scaled, second param is unscaled, assume same here
196     setValue(value, value);
197 }
198 
getFailed(void) const199 bool DbusPassive::getFailed(void) const
200 {
201     if (redundancy)
202     {
203         const std::set<std::string>& failures = redundancy->getFailed();
204         if (failures.find(path) != failures.end())
205         {
206             outputFailsafeLogWithSensor(_id, true, _id,
207                                         "The sensor path is marked redundant.");
208             return true;
209         }
210     }
211 
212     /*
213      * If handle-missing-object-paths is enabled, and the expected D-Bus object
214      * path is not exported, this sensor is created to represent that condition.
215      * Indicate this sensor has failed so the zone enters failSafe mode.
216      */
217     if (_objectMissing)
218     {
219         outputFailsafeLogWithSensor(_id, true, _id,
220                                     "The sensor D-Bus object is missing.");
221         return true;
222     }
223 
224     /*
225      * Unavailable thermal sensors, who are not present or
226      * power-state-not-matching, should not trigger the failSafe mode. For
227      * example, when a system stays at a powered-off state, its CPU Temp
228      * sensors will be unavailable, these unavailable sensors should not be
229      * treated as failed and trigger failSafe.
230      * This is important for systems whose Fans are always on.
231      */
232     if (!_typeFan && !_available && !_unavailableAsFailed)
233     {
234         return false;
235     }
236 
237     // If a reading has came in,
238     // but its value bad in some way (determined by sensor type),
239     // indicate this sensor has failed,
240     // until another value comes in that is no longer bad.
241     // This is different from the overall _failed flag,
242     // which is set and cleared by other causes.
243     if (_badReading)
244     {
245         outputFailsafeLogWithSensor(_id, true, _id,
246                                     "The sensor has bad readings.");
247         return true;
248     }
249 
250     // If a reading has came in, and it is not a bad reading,
251     // but it indicates there is no more thermal margin left,
252     // that is bad, something is wrong with the PID loops,
253     // they are not cooling the system, enable failsafe mode also.
254     if (_marginHot)
255     {
256         outputFailsafeLogWithSensor(_id, true, _id,
257                                     "The sensor has no thermal margin left.");
258         return true;
259     }
260 
261     if (_failed)
262     {
263         outputFailsafeLogWithSensor(
264             _id, true, _id, "The sensor has failed with a critical issue.");
265         return true;
266     }
267 
268     if (!_available)
269     {
270         outputFailsafeLogWithSensor(_id, true, _id,
271                                     "The sensor is unavailable.");
272         return true;
273     }
274 
275     if (!_functional)
276     {
277         outputFailsafeLogWithSensor(_id, true, _id,
278                                     "The sensor is not functional.");
279         return true;
280     }
281 
282     outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered.");
283 
284     return false;
285 }
286 
getFailReason(void) const287 std::string DbusPassive::getFailReason(void) const
288 {
289     if (_objectMissing)
290     {
291         return "Sensor D-Bus object missing";
292     }
293     if (_badReading)
294     {
295         return "Sensor reading bad";
296     }
297     if (_marginHot)
298     {
299         return "Margin hot";
300     }
301     if (_failed)
302     {
303         return "Sensor threshold asserted";
304     }
305     if (!_available)
306     {
307         return "Sensor unavailable";
308     }
309     if (!_functional)
310     {
311         return "Sensor not functional";
312     }
313     return "Unknown";
314 }
315 
setFailed(bool value)316 void DbusPassive::setFailed(bool value)
317 {
318     _failed = value;
319 }
320 
setFunctional(bool value)321 void DbusPassive::setFunctional(bool value)
322 {
323     _functional = value;
324 }
325 
setAvailable(bool value)326 void DbusPassive::setAvailable(bool value)
327 {
328     _available = value;
329 }
330 
getScale(void)331 int64_t DbusPassive::getScale(void)
332 {
333     return _scale;
334 }
335 
getID(void)336 std::string DbusPassive::getID(void)
337 {
338     return _id;
339 }
340 
getMax(void)341 double DbusPassive::getMax(void)
342 {
343     return _max;
344 }
345 
getMin(void)346 double DbusPassive::getMin(void)
347 {
348     return _min;
349 }
350 
updateValue(double value,bool force)351 void DbusPassive::updateValue(double value, bool force)
352 {
353     _badReading = false;
354 
355     // Do not let a NAN, or other floating-point oddity, be used to update
356     // the value, as that indicates the sensor has no valid reading.
357     if (!(std::isfinite(value)))
358     {
359         _badReading = true;
360 
361         // Do not continue with a bad reading, unless caller forcing
362         if (!force)
363         {
364             return;
365         }
366     }
367 
368     value *= std::pow(10.0, _scale);
369 
370     auto unscaled = value;
371     scaleSensorReading(_min, _max, value);
372 
373     if (_typeMargin)
374     {
375         _marginHot = false;
376 
377         // Unlike an absolute temperature sensor,
378         // where 0 degrees C is a good reading,
379         // a value received of 0 (or negative) margin is worrisome,
380         // and should be flagged.
381         // Either it indicates margin not calculated properly,
382         // or somebody forgot to set the margin-zero setpoint,
383         // or the system is really overheating that much.
384         // This is a different condition from _failed
385         // and _badReading, so it merits its own flag.
386         // The sensor has not failed, the reading is good, but the zone
387         // still needs to know that it should go to failsafe mode.
388         if (unscaled <= 0.0)
389         {
390             _marginHot = true;
391         }
392     }
393 
394     setValue(value, unscaled);
395 }
396 
handleSensorValue(sdbusplus::message_t & msg,DbusPassive * owner)397 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner)
398 {
399     std::string msgSensor;
400     std::map<std::string, std::variant<int64_t, double, bool>> msgData;
401 
402     msg.read(msgSensor, msgData);
403 
404     if (msgSensor == "xyz.openbmc_project.Sensor.Value")
405     {
406         auto valPropMap = msgData.find("Value");
407         if (valPropMap != msgData.end())
408         {
409             double value =
410                 std::visit(VariantToDoubleVisitor(), valPropMap->second);
411 
412             owner->updateValue(value, false);
413         }
414     }
415     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical")
416     {
417         auto criticalAlarmLow = msgData.find("CriticalAlarmLow");
418         auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh");
419         if (criticalAlarmHigh == msgData.end() &&
420             criticalAlarmLow == msgData.end())
421         {
422             return 0;
423         }
424 
425         bool asserted = false;
426         if (criticalAlarmLow != msgData.end())
427         {
428             asserted = std::get<bool>(criticalAlarmLow->second);
429         }
430 
431         // checking both as in theory you could de-assert one threshold and
432         // assert the other at the same moment
433         if (!asserted && criticalAlarmHigh != msgData.end())
434         {
435             asserted = std::get<bool>(criticalAlarmHigh->second);
436         }
437         owner->setFailed(asserted);
438     }
439 #ifdef UNC_FAILSAFE
440     else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning")
441     {
442         auto warningAlarmHigh = msgData.find("WarningAlarmHigh");
443         if (warningAlarmHigh == msgData.end())
444         {
445             return 0;
446         }
447 
448         bool asserted = false;
449         if (warningAlarmHigh != msgData.end())
450         {
451             asserted = std::get<bool>(warningAlarmHigh->second);
452         }
453         owner->setFailed(asserted);
454     }
455 #endif
456     else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability")
457     {
458         auto available = msgData.find("Available");
459         if (available == msgData.end())
460         {
461             return 0;
462         }
463         bool asserted = std::get<bool>(available->second);
464         owner->setAvailable(asserted);
465         if (!asserted)
466         {
467             // A thermal controller will continue its PID calculation and not
468             // trigger a 'failsafe' when some inputs are unavailable.
469             // So, forced to clear the value here to prevent a historical
470             // value to participate in a latter PID calculation.
471             owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true);
472         }
473     }
474     else if (msgSensor ==
475              "xyz.openbmc_project.State.Decorator.OperationalStatus")
476     {
477         auto functional = msgData.find("Functional");
478         if (functional == msgData.end())
479         {
480             return 0;
481         }
482         bool asserted = std::get<bool>(functional->second);
483         owner->setFunctional(asserted);
484     }
485 
486     return 0;
487 }
488 
dbusHandleSignal(sd_bus_message * msg,void * usrData,sd_bus_error * err)489 int dbusHandleSignal(sd_bus_message* msg, void* usrData,
490                      [[maybe_unused]] sd_bus_error* err)
491 {
492     auto sdbpMsg = sdbusplus::message_t(msg);
493     DbusPassive* obj = static_cast<DbusPassive*>(usrData);
494 
495     return handleSensorValue(sdbpMsg, obj);
496 }
497 
498 } // namespace pid_control
499