1 /** 2 * Copyright 2017 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "dbuspassive.hpp" 17 18 #include "dbushelper_interface.hpp" 19 #include "dbuspassiveredundancy.hpp" 20 #include "dbusutil.hpp" 21 #include "util.hpp" 22 23 #include <sdbusplus/bus.hpp> 24 25 #include <chrono> 26 #include <cmath> 27 #include <memory> 28 #include <mutex> 29 #include <string> 30 #include <variant> 31 32 namespace pid_control 33 { 34 35 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive( 36 sdbusplus::bus::bus& bus, const std::string& type, const std::string& id, 37 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info, 38 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) 39 { 40 if (helper == nullptr) 41 { 42 return nullptr; 43 } 44 if (!validType(type)) 45 { 46 return nullptr; 47 } 48 49 /* Need to get the scale and initial value */ 50 /* service == busname */ 51 std::string path = getSensorPath(type, id); 52 53 SensorProperties settings; 54 bool failed; 55 56 try 57 { 58 std::string service = helper->getService(sensorintf, path); 59 60 helper->getProperties(service, path, &settings); 61 failed = helper->thresholdsAsserted(service, path); 62 } 63 catch (const std::exception& e) 64 { 65 return nullptr; 66 } 67 68 /* if these values are zero, they're ignored. */ 69 if (info->ignoreDbusMinMax) 70 { 71 settings.min = 0; 72 settings.max = 0; 73 } 74 75 settings.unavailableAsFailed = info->unavailableAsFailed; 76 77 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper), 78 settings, failed, path, redundancy); 79 } 80 81 DbusPassive::DbusPassive( 82 sdbusplus::bus::bus& bus, const std::string& type, const std::string& id, 83 std::unique_ptr<DbusHelperInterface> helper, 84 const SensorProperties& settings, bool failed, const std::string& path, 85 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) : 86 ReadInterface(), 87 _signal(bus, getMatch(type, id).c_str(), dbusHandleSignal, this), _id(id), 88 _helper(std::move(helper)), _failed(failed), path(path), 89 redundancy(redundancy) 90 91 { 92 _scale = settings.scale; 93 _min = settings.min * std::pow(10.0, _scale); 94 _max = settings.max * std::pow(10.0, _scale); 95 _available = settings.available; 96 _unavailableAsFailed = settings.unavailableAsFailed; 97 98 // Cache this type knowledge, to avoid repeated string comparison 99 _typeMargin = (type == "margin"); 100 _typeFan = (type == "fan"); 101 102 // Force value to be stored, otherwise member would be uninitialized 103 updateValue(settings.value, true); 104 } 105 106 ReadReturn DbusPassive::read(void) 107 { 108 std::lock_guard<std::mutex> guard(_lock); 109 110 ReadReturn r = {_value, _updated}; 111 112 return r; 113 } 114 115 void DbusPassive::setValue(double value) 116 { 117 std::lock_guard<std::mutex> guard(_lock); 118 119 _value = value; 120 _updated = std::chrono::high_resolution_clock::now(); 121 } 122 123 bool DbusPassive::getFailed(void) const 124 { 125 if (redundancy) 126 { 127 const std::set<std::string>& failures = redundancy->getFailed(); 128 if (failures.find(path) != failures.end()) 129 { 130 return true; 131 } 132 } 133 134 /* 135 * Unavailable thermal sensors, who are not present or 136 * power-state-not-matching, should not trigger the failSafe mode. For 137 * example, when a system stays at a powered-off state, its CPU Temp 138 * sensors will be unavailable, these unavailable sensors should not be 139 * treated as failed and trigger failSafe. 140 * This is important for systems whose Fans are always on. 141 */ 142 if (!_typeFan && !_available && !_unavailableAsFailed) 143 { 144 return false; 145 } 146 147 // If a reading has came in, 148 // but its value bad in some way (determined by sensor type), 149 // indicate this sensor has failed, 150 // until another value comes in that is no longer bad. 151 // This is different from the overall _failed flag, 152 // which is set and cleared by other causes. 153 if (_badReading) 154 { 155 return true; 156 } 157 158 // If a reading has came in, and it is not a bad reading, 159 // but it indicates there is no more thermal margin left, 160 // that is bad, something is wrong with the PID loops, 161 // they are not cooling the system, enable failsafe mode also. 162 if (_marginHot) 163 { 164 return true; 165 } 166 167 return _failed || !_available || !_functional; 168 } 169 170 void DbusPassive::setFailed(bool value) 171 { 172 _failed = value; 173 } 174 175 void DbusPassive::setFunctional(bool value) 176 { 177 _functional = value; 178 } 179 180 void DbusPassive::setAvailable(bool value) 181 { 182 _available = value; 183 } 184 185 int64_t DbusPassive::getScale(void) 186 { 187 return _scale; 188 } 189 190 std::string DbusPassive::getID(void) 191 { 192 return _id; 193 } 194 195 double DbusPassive::getMax(void) 196 { 197 return _max; 198 } 199 200 double DbusPassive::getMin(void) 201 { 202 return _min; 203 } 204 205 void DbusPassive::updateValue(double value, bool force) 206 { 207 _badReading = false; 208 209 // Do not let a NAN, or other floating-point oddity, be used to update 210 // the value, as that indicates the sensor has no valid reading. 211 if (!(std::isfinite(value))) 212 { 213 _badReading = true; 214 215 // Do not continue with a bad reading, unless caller forcing 216 if (!force) 217 { 218 return; 219 } 220 } 221 222 value *= std::pow(10.0, _scale); 223 224 auto unscaled = value; 225 scaleSensorReading(_min, _max, value); 226 227 if (_typeMargin) 228 { 229 _marginHot = false; 230 231 // Unlike an absolute temperature sensor, 232 // where 0 degrees C is a good reading, 233 // a value received of 0 (or negative) margin is worrisome, 234 // and should be flagged. 235 // Either it indicates margin not calculated properly, 236 // or somebody forgot to set the margin-zero setpoint, 237 // or the system is really overheating that much. 238 // This is a different condition from _failed 239 // and _badReading, so it merits its own flag. 240 // The sensor has not failed, the reading is good, but the zone 241 // still needs to know that it should go to failsafe mode. 242 if (unscaled <= 0.0) 243 { 244 _marginHot = true; 245 } 246 } 247 248 setValue(value); 249 } 250 251 int handleSensorValue(sdbusplus::message::message& msg, DbusPassive* owner) 252 { 253 std::string msgSensor; 254 std::map<std::string, std::variant<int64_t, double, bool>> msgData; 255 256 msg.read(msgSensor, msgData); 257 258 if (msgSensor == "xyz.openbmc_project.Sensor.Value") 259 { 260 auto valPropMap = msgData.find("Value"); 261 if (valPropMap != msgData.end()) 262 { 263 double value = 264 std::visit(VariantToDoubleVisitor(), valPropMap->second); 265 266 owner->updateValue(value, false); 267 } 268 } 269 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical") 270 { 271 auto criticalAlarmLow = msgData.find("CriticalAlarmLow"); 272 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh"); 273 if (criticalAlarmHigh == msgData.end() && 274 criticalAlarmLow == msgData.end()) 275 { 276 return 0; 277 } 278 279 bool asserted = false; 280 if (criticalAlarmLow != msgData.end()) 281 { 282 asserted = std::get<bool>(criticalAlarmLow->second); 283 } 284 285 // checking both as in theory you could de-assert one threshold and 286 // assert the other at the same moment 287 if (!asserted && criticalAlarmHigh != msgData.end()) 288 { 289 asserted = std::get<bool>(criticalAlarmHigh->second); 290 } 291 owner->setFailed(asserted); 292 } 293 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability") 294 { 295 auto available = msgData.find("Available"); 296 if (available == msgData.end()) 297 { 298 return 0; 299 } 300 bool asserted = std::get<bool>(available->second); 301 owner->setAvailable(asserted); 302 if (!asserted) 303 { 304 // A thermal controller will continue its PID calculation and not 305 // trigger a 'failsafe' when some inputs are unavailable. 306 // So, forced to clear the value here to prevent a historical 307 // value to participate in a latter PID calculation. 308 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true); 309 } 310 } 311 else if (msgSensor == 312 "xyz.openbmc_project.State.Decorator.OperationalStatus") 313 { 314 auto functional = msgData.find("Functional"); 315 if (functional == msgData.end()) 316 { 317 return 0; 318 } 319 bool asserted = std::get<bool>(functional->second); 320 owner->setFunctional(asserted); 321 } 322 323 return 0; 324 } 325 326 int dbusHandleSignal(sd_bus_message* msg, void* usrData, sd_bus_error* err) 327 { 328 auto sdbpMsg = sdbusplus::message::message(msg); 329 DbusPassive* obj = static_cast<DbusPassive*>(usrData); 330 331 return handleSensorValue(sdbpMsg, obj); 332 } 333 334 } // namespace pid_control 335