1 /** 2 * Copyright 2017 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "dbuspassive.hpp" 19 20 #include "dbushelper_interface.hpp" 21 #include "dbuspassiveredundancy.hpp" 22 #include "dbusutil.hpp" 23 #include "failsafeloggers/builder.hpp" 24 #include "failsafeloggers/failsafe_logger_utility.hpp" 25 #include "util.hpp" 26 27 #include <sdbusplus/bus.hpp> 28 29 #include <chrono> 30 #include <cmath> 31 #include <memory> 32 #include <mutex> 33 #include <string> 34 #include <variant> 35 36 #include "failsafeloggers/failsafe_logger.cpp" 37 38 namespace pid_control 39 { 40 41 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive( 42 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 43 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info, 44 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) 45 { 46 if (helper == nullptr) 47 { 48 return nullptr; 49 } 50 if (!validType(type)) 51 { 52 return nullptr; 53 } 54 55 /* Need to get the scale and initial value */ 56 /* service == busname */ 57 std::string path; 58 if (info->readPath.empty()) 59 { 60 path = getSensorPath(type, id); 61 } 62 else 63 { 64 path = info->readPath; 65 } 66 67 SensorProperties settings; 68 bool failed; 69 70 try 71 { 72 std::string service = helper->getService(sensorintf, path); 73 74 helper->getProperties(service, path, &settings); 75 failed = helper->thresholdsAsserted(service, path); 76 } 77 catch (const std::exception& e) 78 { 79 #ifndef HANDLE_MISSING_OBJECT_PATHS 80 return nullptr; 81 #else 82 // CASE1: The sensor is not on DBus, but as it is not in the 83 // MissingIsAcceptable list, the sensor should be built with a failed 84 // state to send the zone to failsafe mode. Everything will recover if 85 // all important sensors are back to DBus. swampd will be informed 86 // through InterfacesAdded signals and the sensors will be built again. 87 88 // CASE2: The sensor is in the MissingIsAcceptable list and it EXISTS on 89 // DBus (which sends it all the way here). However, swampd fails to 90 // initialize its setting here because of some DBus error??? 91 // (getService/getProperties/getThresholdAssertion). Build it as a 92 // failed sensor too. A DBus signal will inform if there's s new 93 // property value to the sensor and will recover its state when the new 94 // value is valid. 95 96 // In both cases, the Sensor::getFailed() and 97 // DbusPidZone::markSensorMissing() APIs will decide whether to add a 98 // failed sensor to the _failSafeSensors list. As _failed=true, 99 // _available=false and _badReading=false (due to updateValue(nan, 100 // true)), both cases will have getFailed()=true at the beginning as 101 // long as _unavailableAsFailed=true; However as CASE2 has the sensor in 102 // MissingIsAcceptable list, only CASE1 will send the zone to failSafe 103 // mode. 104 105 failed = true; 106 settings.value = std::numeric_limits<double>::quiet_NaN(); 107 settings.unit = getSensorUnit(type); 108 settings.available = false; 109 std::cerr << "DbusPassive: Sensor " << path 110 << " is missing from D-Bus, build this sensor as failed\n"; 111 #endif 112 } 113 114 /* if these values are zero, they're ignored. */ 115 if (info->ignoreDbusMinMax) 116 { 117 settings.min = 0; 118 settings.max = 0; 119 } 120 121 settings.unavailableAsFailed = info->unavailableAsFailed; 122 123 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper), 124 settings, failed, path, redundancy); 125 } 126 127 DbusPassive::DbusPassive( 128 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 129 std::unique_ptr<DbusHelperInterface> helper, 130 const SensorProperties& settings, bool failed, const std::string& path, 131 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) : 132 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this), 133 _id(id), _helper(std::move(helper)), _failed(failed), path(path), 134 redundancy(redundancy) 135 136 { 137 _scale = settings.scale; 138 _min = settings.min * std::pow(10.0, _scale); 139 _max = settings.max * std::pow(10.0, _scale); 140 _available = settings.available; 141 _unavailableAsFailed = settings.unavailableAsFailed; 142 143 // Cache this type knowledge, to avoid repeated string comparison 144 _typeMargin = (type == "margin"); 145 _typeFan = (type == "fan"); 146 147 // Force value to be stored, otherwise member would be uninitialized 148 updateValue(settings.value, true); 149 } 150 151 ReadReturn DbusPassive::read(void) 152 { 153 std::lock_guard<std::mutex> guard(_lock); 154 155 ReadReturn r = {_value, _updated, _unscaled}; 156 157 return r; 158 } 159 160 void DbusPassive::setValue(double value, double unscaled) 161 { 162 std::lock_guard<std::mutex> guard(_lock); 163 164 _value = value; 165 _unscaled = unscaled; 166 _updated = std::chrono::high_resolution_clock::now(); 167 } 168 169 void DbusPassive::setValue(double value) 170 { 171 // First param is scaled, second param is unscaled, assume same here 172 setValue(value, value); 173 } 174 175 bool DbusPassive::getFailed(void) const 176 { 177 if (redundancy) 178 { 179 const std::set<std::string>& failures = redundancy->getFailed(); 180 if (failures.find(path) != failures.end()) 181 { 182 outputFailsafeLogWithSensor(_id, true, _id, 183 "The sensor path is marked redundant."); 184 return true; 185 } 186 } 187 188 /* 189 * Unavailable thermal sensors, who are not present or 190 * power-state-not-matching, should not trigger the failSafe mode. For 191 * example, when a system stays at a powered-off state, its CPU Temp 192 * sensors will be unavailable, these unavailable sensors should not be 193 * treated as failed and trigger failSafe. 194 * This is important for systems whose Fans are always on. 195 */ 196 if (!_typeFan && !_available && !_unavailableAsFailed) 197 { 198 return false; 199 } 200 201 // If a reading has came in, 202 // but its value bad in some way (determined by sensor type), 203 // indicate this sensor has failed, 204 // until another value comes in that is no longer bad. 205 // This is different from the overall _failed flag, 206 // which is set and cleared by other causes. 207 if (_badReading) 208 { 209 outputFailsafeLogWithSensor(_id, true, _id, 210 "The sensor has bad readings."); 211 return true; 212 } 213 214 // If a reading has came in, and it is not a bad reading, 215 // but it indicates there is no more thermal margin left, 216 // that is bad, something is wrong with the PID loops, 217 // they are not cooling the system, enable failsafe mode also. 218 if (_marginHot) 219 { 220 outputFailsafeLogWithSensor(_id, true, _id, 221 "The sensor has no thermal margin left."); 222 return true; 223 } 224 225 if (_failed) 226 { 227 outputFailsafeLogWithSensor( 228 _id, true, _id, "The sensor has failed with a critical issue."); 229 return true; 230 } 231 232 if (!_available) 233 { 234 outputFailsafeLogWithSensor(_id, true, _id, 235 "The sensor is unavailable."); 236 return true; 237 } 238 239 if (!_functional) 240 { 241 outputFailsafeLogWithSensor(_id, true, _id, 242 "The sensor is not functional."); 243 return true; 244 } 245 246 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered."); 247 248 return false; 249 } 250 251 std::string DbusPassive::getFailReason(void) const 252 { 253 if (_badReading) 254 { 255 return "Sensor reading bad"; 256 } 257 if (_marginHot) 258 { 259 return "Margin hot"; 260 } 261 if (_failed) 262 { 263 return "Sensor threshold asserted"; 264 } 265 if (!_available) 266 { 267 return "Sensor unavailable"; 268 } 269 if (!_functional) 270 { 271 return "Sensor not functional"; 272 } 273 return "Unknown"; 274 } 275 276 void DbusPassive::setFailed(bool value) 277 { 278 _failed = value; 279 } 280 281 void DbusPassive::setFunctional(bool value) 282 { 283 _functional = value; 284 } 285 286 void DbusPassive::setAvailable(bool value) 287 { 288 _available = value; 289 } 290 291 int64_t DbusPassive::getScale(void) 292 { 293 return _scale; 294 } 295 296 std::string DbusPassive::getID(void) 297 { 298 return _id; 299 } 300 301 double DbusPassive::getMax(void) 302 { 303 return _max; 304 } 305 306 double DbusPassive::getMin(void) 307 { 308 return _min; 309 } 310 311 void DbusPassive::updateValue(double value, bool force) 312 { 313 _badReading = false; 314 315 // Do not let a NAN, or other floating-point oddity, be used to update 316 // the value, as that indicates the sensor has no valid reading. 317 if (!(std::isfinite(value))) 318 { 319 _badReading = true; 320 321 // Do not continue with a bad reading, unless caller forcing 322 if (!force) 323 { 324 return; 325 } 326 } 327 328 value *= std::pow(10.0, _scale); 329 330 auto unscaled = value; 331 scaleSensorReading(_min, _max, value); 332 333 if (_typeMargin) 334 { 335 _marginHot = false; 336 337 // Unlike an absolute temperature sensor, 338 // where 0 degrees C is a good reading, 339 // a value received of 0 (or negative) margin is worrisome, 340 // and should be flagged. 341 // Either it indicates margin not calculated properly, 342 // or somebody forgot to set the margin-zero setpoint, 343 // or the system is really overheating that much. 344 // This is a different condition from _failed 345 // and _badReading, so it merits its own flag. 346 // The sensor has not failed, the reading is good, but the zone 347 // still needs to know that it should go to failsafe mode. 348 if (unscaled <= 0.0) 349 { 350 _marginHot = true; 351 } 352 } 353 354 setValue(value, unscaled); 355 } 356 357 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner) 358 { 359 std::string msgSensor; 360 std::map<std::string, std::variant<int64_t, double, bool>> msgData; 361 362 msg.read(msgSensor, msgData); 363 364 if (msgSensor == "xyz.openbmc_project.Sensor.Value") 365 { 366 auto valPropMap = msgData.find("Value"); 367 if (valPropMap != msgData.end()) 368 { 369 double value = 370 std::visit(VariantToDoubleVisitor(), valPropMap->second); 371 372 owner->updateValue(value, false); 373 } 374 } 375 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical") 376 { 377 auto criticalAlarmLow = msgData.find("CriticalAlarmLow"); 378 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh"); 379 if (criticalAlarmHigh == msgData.end() && 380 criticalAlarmLow == msgData.end()) 381 { 382 return 0; 383 } 384 385 bool asserted = false; 386 if (criticalAlarmLow != msgData.end()) 387 { 388 asserted = std::get<bool>(criticalAlarmLow->second); 389 } 390 391 // checking both as in theory you could de-assert one threshold and 392 // assert the other at the same moment 393 if (!asserted && criticalAlarmHigh != msgData.end()) 394 { 395 asserted = std::get<bool>(criticalAlarmHigh->second); 396 } 397 owner->setFailed(asserted); 398 } 399 #ifdef UNC_FAILSAFE 400 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning") 401 { 402 auto warningAlarmHigh = msgData.find("WarningAlarmHigh"); 403 if (warningAlarmHigh == msgData.end()) 404 { 405 return 0; 406 } 407 408 bool asserted = false; 409 if (warningAlarmHigh != msgData.end()) 410 { 411 asserted = std::get<bool>(warningAlarmHigh->second); 412 } 413 owner->setFailed(asserted); 414 } 415 #endif 416 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability") 417 { 418 auto available = msgData.find("Available"); 419 if (available == msgData.end()) 420 { 421 return 0; 422 } 423 bool asserted = std::get<bool>(available->second); 424 owner->setAvailable(asserted); 425 if (!asserted) 426 { 427 // A thermal controller will continue its PID calculation and not 428 // trigger a 'failsafe' when some inputs are unavailable. 429 // So, forced to clear the value here to prevent a historical 430 // value to participate in a latter PID calculation. 431 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true); 432 } 433 } 434 else if (msgSensor == 435 "xyz.openbmc_project.State.Decorator.OperationalStatus") 436 { 437 auto functional = msgData.find("Functional"); 438 if (functional == msgData.end()) 439 { 440 return 0; 441 } 442 bool asserted = std::get<bool>(functional->second); 443 owner->setFunctional(asserted); 444 } 445 446 return 0; 447 } 448 449 int dbusHandleSignal(sd_bus_message* msg, void* usrData, 450 [[maybe_unused]] sd_bus_error* err) 451 { 452 auto sdbpMsg = sdbusplus::message_t(msg); 453 DbusPassive* obj = static_cast<DbusPassive*>(usrData); 454 455 return handleSensorValue(sdbpMsg, obj); 456 } 457 458 } // namespace pid_control 459