1 /** 2 * Copyright 2017 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "dbuspassive.hpp" 17 18 #include "dbushelper_interface.hpp" 19 #include "dbuspassiveredundancy.hpp" 20 #include "dbusutil.hpp" 21 #include "util.hpp" 22 23 #include <sdbusplus/bus.hpp> 24 25 #include <chrono> 26 #include <cmath> 27 #include <memory> 28 #include <mutex> 29 #include <string> 30 #include <variant> 31 32 namespace pid_control 33 { 34 35 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive( 36 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 37 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info, 38 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) 39 { 40 if (helper == nullptr) 41 { 42 return nullptr; 43 } 44 if (!validType(type)) 45 { 46 return nullptr; 47 } 48 49 /* Need to get the scale and initial value */ 50 /* service == busname */ 51 std::string path; 52 if (info->readPath.empty()) 53 { 54 path = getSensorPath(type, id); 55 } 56 else 57 { 58 path = info->readPath; 59 } 60 61 SensorProperties settings; 62 bool failed; 63 64 try 65 { 66 std::string service = helper->getService(sensorintf, path); 67 68 helper->getProperties(service, path, &settings); 69 failed = helper->thresholdsAsserted(service, path); 70 } 71 catch (const std::exception& e) 72 { 73 return nullptr; 74 } 75 76 /* if these values are zero, they're ignored. */ 77 if (info->ignoreDbusMinMax) 78 { 79 settings.min = 0; 80 settings.max = 0; 81 } 82 83 settings.unavailableAsFailed = info->unavailableAsFailed; 84 85 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper), 86 settings, failed, path, redundancy); 87 } 88 89 DbusPassive::DbusPassive( 90 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 91 std::unique_ptr<DbusHelperInterface> helper, 92 const SensorProperties& settings, bool failed, const std::string& path, 93 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) : 94 ReadInterface(), 95 _signal(bus, getMatch(path).c_str(), dbusHandleSignal, this), _id(id), 96 _helper(std::move(helper)), _failed(failed), path(path), 97 redundancy(redundancy) 98 99 { 100 _scale = settings.scale; 101 _min = settings.min * std::pow(10.0, _scale); 102 _max = settings.max * std::pow(10.0, _scale); 103 _available = settings.available; 104 _unavailableAsFailed = settings.unavailableAsFailed; 105 106 // Cache this type knowledge, to avoid repeated string comparison 107 _typeMargin = (type == "margin"); 108 _typeFan = (type == "fan"); 109 110 // Force value to be stored, otherwise member would be uninitialized 111 updateValue(settings.value, true); 112 } 113 114 ReadReturn DbusPassive::read(void) 115 { 116 std::lock_guard<std::mutex> guard(_lock); 117 118 ReadReturn r = {_value, _updated}; 119 120 return r; 121 } 122 123 void DbusPassive::setValue(double value) 124 { 125 std::lock_guard<std::mutex> guard(_lock); 126 127 _value = value; 128 _updated = std::chrono::high_resolution_clock::now(); 129 } 130 131 bool DbusPassive::getFailed(void) const 132 { 133 if (redundancy) 134 { 135 const std::set<std::string>& failures = redundancy->getFailed(); 136 if (failures.find(path) != failures.end()) 137 { 138 return true; 139 } 140 } 141 142 /* 143 * Unavailable thermal sensors, who are not present or 144 * power-state-not-matching, should not trigger the failSafe mode. For 145 * example, when a system stays at a powered-off state, its CPU Temp 146 * sensors will be unavailable, these unavailable sensors should not be 147 * treated as failed and trigger failSafe. 148 * This is important for systems whose Fans are always on. 149 */ 150 if (!_typeFan && !_available && !_unavailableAsFailed) 151 { 152 return false; 153 } 154 155 // If a reading has came in, 156 // but its value bad in some way (determined by sensor type), 157 // indicate this sensor has failed, 158 // until another value comes in that is no longer bad. 159 // This is different from the overall _failed flag, 160 // which is set and cleared by other causes. 161 if (_badReading) 162 { 163 return true; 164 } 165 166 // If a reading has came in, and it is not a bad reading, 167 // but it indicates there is no more thermal margin left, 168 // that is bad, something is wrong with the PID loops, 169 // they are not cooling the system, enable failsafe mode also. 170 if (_marginHot) 171 { 172 return true; 173 } 174 175 return _failed || !_available || !_functional; 176 } 177 178 void DbusPassive::setFailed(bool value) 179 { 180 _failed = value; 181 } 182 183 void DbusPassive::setFunctional(bool value) 184 { 185 _functional = value; 186 } 187 188 void DbusPassive::setAvailable(bool value) 189 { 190 _available = value; 191 } 192 193 int64_t DbusPassive::getScale(void) 194 { 195 return _scale; 196 } 197 198 std::string DbusPassive::getID(void) 199 { 200 return _id; 201 } 202 203 double DbusPassive::getMax(void) 204 { 205 return _max; 206 } 207 208 double DbusPassive::getMin(void) 209 { 210 return _min; 211 } 212 213 void DbusPassive::updateValue(double value, bool force) 214 { 215 _badReading = false; 216 217 // Do not let a NAN, or other floating-point oddity, be used to update 218 // the value, as that indicates the sensor has no valid reading. 219 if (!(std::isfinite(value))) 220 { 221 _badReading = true; 222 223 // Do not continue with a bad reading, unless caller forcing 224 if (!force) 225 { 226 return; 227 } 228 } 229 230 value *= std::pow(10.0, _scale); 231 232 auto unscaled = value; 233 scaleSensorReading(_min, _max, value); 234 235 if (_typeMargin) 236 { 237 _marginHot = false; 238 239 // Unlike an absolute temperature sensor, 240 // where 0 degrees C is a good reading, 241 // a value received of 0 (or negative) margin is worrisome, 242 // and should be flagged. 243 // Either it indicates margin not calculated properly, 244 // or somebody forgot to set the margin-zero setpoint, 245 // or the system is really overheating that much. 246 // This is a different condition from _failed 247 // and _badReading, so it merits its own flag. 248 // The sensor has not failed, the reading is good, but the zone 249 // still needs to know that it should go to failsafe mode. 250 if (unscaled <= 0.0) 251 { 252 _marginHot = true; 253 } 254 } 255 256 setValue(value); 257 } 258 259 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner) 260 { 261 std::string msgSensor; 262 std::map<std::string, std::variant<int64_t, double, bool>> msgData; 263 264 msg.read(msgSensor, msgData); 265 266 if (msgSensor == "xyz.openbmc_project.Sensor.Value") 267 { 268 auto valPropMap = msgData.find("Value"); 269 if (valPropMap != msgData.end()) 270 { 271 double value = 272 std::visit(VariantToDoubleVisitor(), valPropMap->second); 273 274 owner->updateValue(value, false); 275 } 276 } 277 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical") 278 { 279 auto criticalAlarmLow = msgData.find("CriticalAlarmLow"); 280 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh"); 281 if (criticalAlarmHigh == msgData.end() && 282 criticalAlarmLow == msgData.end()) 283 { 284 return 0; 285 } 286 287 bool asserted = false; 288 if (criticalAlarmLow != msgData.end()) 289 { 290 asserted = std::get<bool>(criticalAlarmLow->second); 291 } 292 293 // checking both as in theory you could de-assert one threshold and 294 // assert the other at the same moment 295 if (!asserted && criticalAlarmHigh != msgData.end()) 296 { 297 asserted = std::get<bool>(criticalAlarmHigh->second); 298 } 299 owner->setFailed(asserted); 300 } 301 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability") 302 { 303 auto available = msgData.find("Available"); 304 if (available == msgData.end()) 305 { 306 return 0; 307 } 308 bool asserted = std::get<bool>(available->second); 309 owner->setAvailable(asserted); 310 if (!asserted) 311 { 312 // A thermal controller will continue its PID calculation and not 313 // trigger a 'failsafe' when some inputs are unavailable. 314 // So, forced to clear the value here to prevent a historical 315 // value to participate in a latter PID calculation. 316 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true); 317 } 318 } 319 else if (msgSensor == 320 "xyz.openbmc_project.State.Decorator.OperationalStatus") 321 { 322 auto functional = msgData.find("Functional"); 323 if (functional == msgData.end()) 324 { 325 return 0; 326 } 327 bool asserted = std::get<bool>(functional->second); 328 owner->setFunctional(asserted); 329 } 330 331 return 0; 332 } 333 334 int dbusHandleSignal(sd_bus_message* msg, void* usrData, 335 [[maybe_unused]] sd_bus_error* err) 336 { 337 auto sdbpMsg = sdbusplus::message_t(msg); 338 DbusPassive* obj = static_cast<DbusPassive*>(usrData); 339 340 return handleSensorValue(sdbpMsg, obj); 341 } 342 343 } // namespace pid_control 344