1 /** 2 * Copyright © 2020 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "system.hpp" 17 18 #include "fan.hpp" 19 #include "fan_defs.hpp" 20 #include "tach_sensor.hpp" 21 #include "trust_manager.hpp" 22 #include "types.hpp" 23 #ifdef MONITOR_USE_JSON 24 #include "json_parser.hpp" 25 #endif 26 27 #include "config.h" 28 29 #include <nlohmann/json.hpp> 30 #include <phosphor-logging/log.hpp> 31 #include <sdbusplus/bus.hpp> 32 #include <sdeventplus/event.hpp> 33 #include <sdeventplus/source/signal.hpp> 34 35 namespace phosphor::fan::monitor 36 { 37 38 using json = nlohmann::json; 39 using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level; 40 41 using namespace phosphor::logging; 42 43 System::System(Mode mode, sdbusplus::bus::bus& bus, 44 const sdeventplus::Event& event) : 45 _mode(mode), 46 _bus(bus), _event(event), 47 _powerState(std::make_unique<PGoodState>( 48 bus, std::bind(std::mem_fn(&System::powerStateChanged), this, 49 std::placeholders::_1))), 50 _thermalAlert(bus, THERMAL_ALERT_OBJPATH) 51 {} 52 53 void System::start() 54 { 55 _started = true; 56 json jsonObj = json::object(); 57 #ifdef MONITOR_USE_JSON 58 auto confFile = 59 fan::JsonConfig::getConfFile(_bus, confAppName, confFileName); 60 jsonObj = fan::JsonConfig::load(confFile); 61 #endif 62 // Retrieve and set trust groups within the trust manager 63 setTrustMgr(getTrustGroups(jsonObj)); 64 // Retrieve fan definitions and create fan objects to be monitored 65 setFans(getFanDefinitions(jsonObj)); 66 setFaultConfig(jsonObj); 67 log<level::INFO>("Configuration loaded"); 68 69 if (_powerState->isPowerOn()) 70 { 71 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 72 [this](auto& rule) { 73 rule->check(PowerRuleState::runtime, _fanHealth); 74 }); 75 } 76 } 77 78 void System::sighupHandler(sdeventplus::source::Signal&, 79 const struct signalfd_siginfo*) 80 { 81 try 82 { 83 json jsonObj = json::object(); 84 #ifdef MONITOR_USE_JSON 85 jsonObj = getJsonObj(_bus); 86 #endif 87 auto trustGrps = getTrustGroups(jsonObj); 88 auto fanDefs = getFanDefinitions(jsonObj); 89 // Set configured trust groups 90 setTrustMgr(trustGrps); 91 // Clear/set configured fan definitions 92 _fans.clear(); 93 _fanHealth.clear(); 94 setFans(fanDefs); 95 setFaultConfig(jsonObj); 96 log<level::INFO>("Configuration reloaded successfully"); 97 98 if (_powerState->isPowerOn()) 99 { 100 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 101 [this](auto& rule) { 102 rule->check(PowerRuleState::runtime, _fanHealth); 103 }); 104 } 105 } 106 catch (std::runtime_error& re) 107 { 108 log<level::ERR>("Error reloading config, no config changes made", 109 entry("LOAD_ERROR=%s", re.what())); 110 } 111 } 112 113 const std::vector<CreateGroupFunction> 114 System::getTrustGroups(const json& jsonObj) 115 { 116 #ifdef MONITOR_USE_JSON 117 return getTrustGrps(jsonObj); 118 #else 119 return trustGroups; 120 #endif 121 } 122 123 void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs) 124 { 125 _trust = std::make_unique<trust::Manager>(groupFuncs); 126 } 127 128 const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj) 129 { 130 #ifdef MONITOR_USE_JSON 131 return getFanDefs(jsonObj); 132 #else 133 return fanDefinitions; 134 #endif 135 } 136 137 void System::setFans(const std::vector<FanDefinition>& fanDefs) 138 { 139 for (const auto& fanDef : fanDefs) 140 { 141 // Check if a condition exists on the fan 142 auto condition = std::get<conditionField>(fanDef); 143 if (condition) 144 { 145 // Condition exists, skip adding fan if it fails 146 if (!(*condition)(_bus)) 147 { 148 continue; 149 } 150 } 151 _fans.emplace_back( 152 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this)); 153 154 updateFanHealth(*(_fans.back())); 155 } 156 } 157 158 void System::updateFanHealth(const Fan& fan) 159 { 160 std::vector<bool> sensorStatus; 161 for (const auto& sensor : fan.sensors()) 162 { 163 sensorStatus.push_back(sensor->functional()); 164 } 165 166 _fanHealth[fan.getName()] = 167 std::make_tuple(fan.present(), std::move(sensorStatus)); 168 } 169 170 void System::fanStatusChange(const Fan& fan, bool skipRulesCheck) 171 { 172 updateFanHealth(fan); 173 174 if (_powerState->isPowerOn() && !skipRulesCheck) 175 { 176 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 177 [this](auto& rule) { 178 rule->check(PowerRuleState::runtime, _fanHealth); 179 }); 180 } 181 } 182 183 void System::setFaultConfig(const json& jsonObj) 184 { 185 #ifdef MONITOR_USE_JSON 186 std::shared_ptr<PowerInterfaceBase> powerInterface = 187 std::make_shared<PowerInterface>(_thermalAlert); 188 189 PowerOffAction::PrePowerOffFunc func = 190 std::bind(std::mem_fn(&System::logShutdownError), this); 191 192 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func); 193 194 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj); 195 #endif 196 } 197 198 void System::powerStateChanged(bool powerStateOn) 199 { 200 std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) { 201 fan->powerStateChanged(powerStateOn); 202 }); 203 204 if (powerStateOn) 205 { 206 if (!_started) 207 { 208 log<level::ERR>("No conf file found at power on"); 209 throw std::runtime_error("No conf file found at power on"); 210 } 211 212 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 213 [this](auto& rule) { 214 rule->check(PowerRuleState::atPgood, _fanHealth); 215 }); 216 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 217 [this](auto& rule) { 218 rule->check(PowerRuleState::runtime, _fanHealth); 219 }); 220 } 221 else 222 { 223 _thermalAlert.enabled(false); 224 225 // Cancel any in-progress power off actions 226 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 227 [this](auto& rule) { rule->cancel(); }); 228 } 229 } 230 231 void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor) 232 { 233 std::string fanPath{util::INVENTORY_PATH + fan.getName()}; 234 235 getLogger().log( 236 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath, 237 sensor.name()), 238 Logger::error); 239 240 // In order to know if the event log should have a severity of error or 241 // informational, count the number of existing nonfunctional sensors and 242 // compare it to _numNonfuncSensorsBeforeError. 243 size_t nonfuncSensors = 0; 244 for (const auto& fan : _fans) 245 { 246 for (const auto& s : fan->sensors()) 247 { 248 // Don't count nonfunctional sensors that still have their 249 // error timer running as nonfunctional since they haven't 250 // had event logs created for those errors yet. 251 if (!s->functional() && !s->errorTimerRunning()) 252 { 253 nonfuncSensors++; 254 } 255 } 256 } 257 258 Severity severity = Severity::Error; 259 if (nonfuncSensors < _numNonfuncSensorsBeforeError) 260 { 261 severity = Severity::Informational; 262 } 263 264 auto error = 265 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault", 266 fanPath, sensor.name(), severity); 267 268 auto sensorData = captureSensorData(); 269 error->commit(sensorData); 270 271 // Save the error so it can be committed again on a power off. 272 _lastError = std::move(error); 273 } 274 275 void System::fanMissingErrorTimerExpired(const Fan& fan) 276 { 277 std::string fanPath{util::INVENTORY_PATH + fan.getName()}; 278 279 getLogger().log( 280 fmt::format("Creating event log for missing fan {}", fanPath), 281 Logger::error); 282 283 auto error = std::make_unique<FanError>( 284 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error); 285 286 auto sensorData = captureSensorData(); 287 error->commit(sensorData); 288 289 // Save the error so it can be committed again on a power off. 290 _lastError = std::move(error); 291 } 292 293 void System::logShutdownError() 294 { 295 if (_lastError) 296 { 297 getLogger().log("Re-committing previous fan error before power off"); 298 299 // Still use the latest sensor data 300 auto sensorData = captureSensorData(); 301 _lastError->commit(sensorData, true); 302 } 303 } 304 305 json System::captureSensorData() 306 { 307 json data; 308 309 for (const auto& fan : _fans) 310 { 311 for (const auto& sensor : fan->sensors()) 312 { 313 json values; 314 values["present"] = fan->present(); 315 values["functional"] = sensor->functional(); 316 values["tach"] = sensor->getInput(); 317 if (sensor->hasTarget()) 318 { 319 values["target"] = sensor->getTarget(); 320 } 321 322 data["sensors"][sensor->name()] = values; 323 } 324 } 325 326 return data; 327 } 328 329 } // namespace phosphor::fan::monitor 330