1 /** 2 * Copyright 2017 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "dbuspassive.hpp" 19 20 #include "dbushelper_interface.hpp" 21 #include "dbuspassiveredundancy.hpp" 22 #include "dbusutil.hpp" 23 #include "failsafeloggers/builder.hpp" 24 #include "failsafeloggers/failsafe_logger_utility.hpp" 25 #include "util.hpp" 26 27 #include <sdbusplus/bus.hpp> 28 29 #include <chrono> 30 #include <cmath> 31 #include <memory> 32 #include <mutex> 33 #include <string> 34 #include <variant> 35 36 #include "failsafeloggers/failsafe_logger.cpp" 37 38 namespace pid_control 39 { 40 41 std::unique_ptr<ReadInterface> DbusPassive::createDbusPassive( 42 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 43 std::unique_ptr<DbusHelperInterface> helper, const conf::SensorConfig* info, 44 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) 45 { 46 if (helper == nullptr) 47 { 48 return nullptr; 49 } 50 if (!validType(type)) 51 { 52 return nullptr; 53 } 54 55 /* Need to get the scale and initial value */ 56 /* service == busname */ 57 std::string path; 58 if (info->readPath.empty()) 59 { 60 path = getSensorPath(type, id); 61 } 62 else 63 { 64 path = info->readPath; 65 } 66 67 SensorProperties settings; 68 bool failed; 69 std::string service; 70 71 try 72 { 73 service = helper->getService(sensorintf, path); 74 } 75 catch (const std::exception& e) 76 { 77 #ifndef HANDLE_MISSING_OBJECT_PATHS 78 return nullptr; 79 #else 80 // CASE1: The sensor is not on DBus, but as it is not in the 81 // MissingIsAcceptable list, the sensor should be built with a failed 82 // state to send the zone to failsafe mode. Everything will recover if 83 // all important sensors are back to DBus. swampd will be informed 84 // through InterfacesAdded signals and the sensors will be built again. 85 86 // CASE2: The sensor is on D-Bus (getService succeeds) but getProperties 87 // fails (e.g., D-Bus error or property fetch failure). In this case, 88 // handle-missing-object-paths does not apply. The sensor build fails, 89 // and the control loop will keep restarting until getProperties 90 // succeeds. 91 92 // Only CASE1 may send the zone to failsafe mode if the sensor is not 93 // in MissingIsAcceptable. CASE2 results in continuous restart until 94 // recovery. 95 96 failed = true; 97 settings.value = std::numeric_limits<double>::quiet_NaN(); 98 settings.unit = getSensorUnit(type); 99 settings.available = false; 100 settings.unavailableAsFailed = true; 101 if (info->ignoreDbusMinMax) 102 { 103 settings.min = 0; 104 settings.max = 0; 105 } 106 std::cerr << "DbusPassive: Sensor " << path 107 << " is missing from D-Bus, build this sensor as failed\n"; 108 return std::make_unique<DbusPassive>( 109 bus, type, id, std::move(helper), settings, failed, path, 110 redundancy); 111 #endif 112 } 113 114 try 115 { 116 helper->getProperties(service, path, &settings); 117 failed = helper->thresholdsAsserted(service, path); 118 } 119 catch (const std::exception& e) 120 { 121 return nullptr; 122 } 123 124 /* if these values are zero, they're ignored. */ 125 if (info->ignoreDbusMinMax) 126 { 127 settings.min = 0; 128 settings.max = 0; 129 } 130 131 settings.unavailableAsFailed = info->unavailableAsFailed; 132 133 return std::make_unique<DbusPassive>(bus, type, id, std::move(helper), 134 settings, failed, path, redundancy); 135 } 136 137 DbusPassive::DbusPassive( 138 sdbusplus::bus_t& bus, const std::string& type, const std::string& id, 139 std::unique_ptr<DbusHelperInterface> helper, 140 const SensorProperties& settings, bool failed, const std::string& path, 141 const std::shared_ptr<DbusPassiveRedundancy>& redundancy) : 142 ReadInterface(), _signal(bus, getMatch(path), dbusHandleSignal, this), 143 _id(id), _helper(std::move(helper)), _failed(failed), path(path), 144 redundancy(redundancy) 145 146 { 147 _scale = settings.scale; 148 _min = settings.min * std::pow(10.0, _scale); 149 _max = settings.max * std::pow(10.0, _scale); 150 _available = settings.available; 151 _unavailableAsFailed = settings.unavailableAsFailed; 152 153 // Cache this type knowledge, to avoid repeated string comparison 154 _typeMargin = (type == "margin"); 155 _typeFan = (type == "fan"); 156 157 // Force value to be stored, otherwise member would be uninitialized 158 updateValue(settings.value, true); 159 } 160 161 ReadReturn DbusPassive::read(void) 162 { 163 std::lock_guard<std::mutex> guard(_lock); 164 165 ReadReturn r = {_value, _updated, _unscaled}; 166 167 return r; 168 } 169 170 void DbusPassive::setValue(double value, double unscaled) 171 { 172 std::lock_guard<std::mutex> guard(_lock); 173 174 _value = value; 175 _unscaled = unscaled; 176 _updated = std::chrono::high_resolution_clock::now(); 177 } 178 179 void DbusPassive::setValue(double value) 180 { 181 // First param is scaled, second param is unscaled, assume same here 182 setValue(value, value); 183 } 184 185 bool DbusPassive::getFailed(void) const 186 { 187 if (redundancy) 188 { 189 const std::set<std::string>& failures = redundancy->getFailed(); 190 if (failures.find(path) != failures.end()) 191 { 192 outputFailsafeLogWithSensor(_id, true, _id, 193 "The sensor path is marked redundant."); 194 return true; 195 } 196 } 197 198 /* 199 * Unavailable thermal sensors, who are not present or 200 * power-state-not-matching, should not trigger the failSafe mode. For 201 * example, when a system stays at a powered-off state, its CPU Temp 202 * sensors will be unavailable, these unavailable sensors should not be 203 * treated as failed and trigger failSafe. 204 * This is important for systems whose Fans are always on. 205 */ 206 if (!_typeFan && !_available && !_unavailableAsFailed) 207 { 208 return false; 209 } 210 211 // If a reading has came in, 212 // but its value bad in some way (determined by sensor type), 213 // indicate this sensor has failed, 214 // until another value comes in that is no longer bad. 215 // This is different from the overall _failed flag, 216 // which is set and cleared by other causes. 217 if (_badReading) 218 { 219 outputFailsafeLogWithSensor(_id, true, _id, 220 "The sensor has bad readings."); 221 return true; 222 } 223 224 // If a reading has came in, and it is not a bad reading, 225 // but it indicates there is no more thermal margin left, 226 // that is bad, something is wrong with the PID loops, 227 // they are not cooling the system, enable failsafe mode also. 228 if (_marginHot) 229 { 230 outputFailsafeLogWithSensor(_id, true, _id, 231 "The sensor has no thermal margin left."); 232 return true; 233 } 234 235 if (_failed) 236 { 237 outputFailsafeLogWithSensor( 238 _id, true, _id, "The sensor has failed with a critical issue."); 239 return true; 240 } 241 242 if (!_available) 243 { 244 outputFailsafeLogWithSensor(_id, true, _id, 245 "The sensor is unavailable."); 246 return true; 247 } 248 249 if (!_functional) 250 { 251 outputFailsafeLogWithSensor(_id, true, _id, 252 "The sensor is not functional."); 253 return true; 254 } 255 256 outputFailsafeLogWithSensor(_id, false, _id, "The sensor has recovered."); 257 258 return false; 259 } 260 261 std::string DbusPassive::getFailReason(void) const 262 { 263 if (_badReading) 264 { 265 return "Sensor reading bad"; 266 } 267 if (_marginHot) 268 { 269 return "Margin hot"; 270 } 271 if (_failed) 272 { 273 return "Sensor threshold asserted"; 274 } 275 if (!_available) 276 { 277 return "Sensor unavailable"; 278 } 279 if (!_functional) 280 { 281 return "Sensor not functional"; 282 } 283 return "Unknown"; 284 } 285 286 void DbusPassive::setFailed(bool value) 287 { 288 _failed = value; 289 } 290 291 void DbusPassive::setFunctional(bool value) 292 { 293 _functional = value; 294 } 295 296 void DbusPassive::setAvailable(bool value) 297 { 298 _available = value; 299 } 300 301 int64_t DbusPassive::getScale(void) 302 { 303 return _scale; 304 } 305 306 std::string DbusPassive::getID(void) 307 { 308 return _id; 309 } 310 311 double DbusPassive::getMax(void) 312 { 313 return _max; 314 } 315 316 double DbusPassive::getMin(void) 317 { 318 return _min; 319 } 320 321 void DbusPassive::updateValue(double value, bool force) 322 { 323 _badReading = false; 324 325 // Do not let a NAN, or other floating-point oddity, be used to update 326 // the value, as that indicates the sensor has no valid reading. 327 if (!(std::isfinite(value))) 328 { 329 _badReading = true; 330 331 // Do not continue with a bad reading, unless caller forcing 332 if (!force) 333 { 334 return; 335 } 336 } 337 338 value *= std::pow(10.0, _scale); 339 340 auto unscaled = value; 341 scaleSensorReading(_min, _max, value); 342 343 if (_typeMargin) 344 { 345 _marginHot = false; 346 347 // Unlike an absolute temperature sensor, 348 // where 0 degrees C is a good reading, 349 // a value received of 0 (or negative) margin is worrisome, 350 // and should be flagged. 351 // Either it indicates margin not calculated properly, 352 // or somebody forgot to set the margin-zero setpoint, 353 // or the system is really overheating that much. 354 // This is a different condition from _failed 355 // and _badReading, so it merits its own flag. 356 // The sensor has not failed, the reading is good, but the zone 357 // still needs to know that it should go to failsafe mode. 358 if (unscaled <= 0.0) 359 { 360 _marginHot = true; 361 } 362 } 363 364 setValue(value, unscaled); 365 } 366 367 int handleSensorValue(sdbusplus::message_t& msg, DbusPassive* owner) 368 { 369 std::string msgSensor; 370 std::map<std::string, std::variant<int64_t, double, bool>> msgData; 371 372 msg.read(msgSensor, msgData); 373 374 if (msgSensor == "xyz.openbmc_project.Sensor.Value") 375 { 376 auto valPropMap = msgData.find("Value"); 377 if (valPropMap != msgData.end()) 378 { 379 double value = 380 std::visit(VariantToDoubleVisitor(), valPropMap->second); 381 382 owner->updateValue(value, false); 383 } 384 } 385 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Critical") 386 { 387 auto criticalAlarmLow = msgData.find("CriticalAlarmLow"); 388 auto criticalAlarmHigh = msgData.find("CriticalAlarmHigh"); 389 if (criticalAlarmHigh == msgData.end() && 390 criticalAlarmLow == msgData.end()) 391 { 392 return 0; 393 } 394 395 bool asserted = false; 396 if (criticalAlarmLow != msgData.end()) 397 { 398 asserted = std::get<bool>(criticalAlarmLow->second); 399 } 400 401 // checking both as in theory you could de-assert one threshold and 402 // assert the other at the same moment 403 if (!asserted && criticalAlarmHigh != msgData.end()) 404 { 405 asserted = std::get<bool>(criticalAlarmHigh->second); 406 } 407 owner->setFailed(asserted); 408 } 409 #ifdef UNC_FAILSAFE 410 else if (msgSensor == "xyz.openbmc_project.Sensor.Threshold.Warning") 411 { 412 auto warningAlarmHigh = msgData.find("WarningAlarmHigh"); 413 if (warningAlarmHigh == msgData.end()) 414 { 415 return 0; 416 } 417 418 bool asserted = false; 419 if (warningAlarmHigh != msgData.end()) 420 { 421 asserted = std::get<bool>(warningAlarmHigh->second); 422 } 423 owner->setFailed(asserted); 424 } 425 #endif 426 else if (msgSensor == "xyz.openbmc_project.State.Decorator.Availability") 427 { 428 auto available = msgData.find("Available"); 429 if (available == msgData.end()) 430 { 431 return 0; 432 } 433 bool asserted = std::get<bool>(available->second); 434 owner->setAvailable(asserted); 435 if (!asserted) 436 { 437 // A thermal controller will continue its PID calculation and not 438 // trigger a 'failsafe' when some inputs are unavailable. 439 // So, forced to clear the value here to prevent a historical 440 // value to participate in a latter PID calculation. 441 owner->updateValue(std::numeric_limits<double>::quiet_NaN(), true); 442 } 443 } 444 else if (msgSensor == 445 "xyz.openbmc_project.State.Decorator.OperationalStatus") 446 { 447 auto functional = msgData.find("Functional"); 448 if (functional == msgData.end()) 449 { 450 return 0; 451 } 452 bool asserted = std::get<bool>(functional->second); 453 owner->setFunctional(asserted); 454 } 455 456 return 0; 457 } 458 459 int dbusHandleSignal(sd_bus_message* msg, void* usrData, 460 [[maybe_unused]] sd_bus_error* err) 461 { 462 auto sdbpMsg = sdbusplus::message_t(msg); 463 DbusPassive* obj = static_cast<DbusPassive*>(usrData); 464 465 return handleSensorValue(sdbpMsg, obj); 466 } 467 468 } // namespace pid_control 469