1 /** 2 * Copyright © 2020 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "system.hpp" 17 18 #include "fan.hpp" 19 #include "fan_defs.hpp" 20 #include "tach_sensor.hpp" 21 #include "trust_manager.hpp" 22 #include "types.hpp" 23 #ifdef MONITOR_USE_JSON 24 #include "json_parser.hpp" 25 #endif 26 27 #include "config.h" 28 29 #include <nlohmann/json.hpp> 30 #include <phosphor-logging/log.hpp> 31 #include <sdbusplus/bus.hpp> 32 #include <sdeventplus/event.hpp> 33 #include <sdeventplus/source/signal.hpp> 34 35 namespace phosphor::fan::monitor 36 { 37 38 using json = nlohmann::json; 39 using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level; 40 41 using namespace phosphor::logging; 42 43 System::System(Mode mode, sdbusplus::bus::bus& bus, 44 const sdeventplus::Event& event) : 45 _mode(mode), 46 _bus(bus), _event(event), 47 _powerState(std::make_unique<PGoodState>( 48 bus, std::bind(std::mem_fn(&System::powerStateChanged), this, 49 std::placeholders::_1))), 50 _thermalAlert(bus, THERMAL_ALERT_OBJPATH) 51 { 52 53 json jsonObj = json::object(); 54 #ifdef MONITOR_USE_JSON 55 jsonObj = getJsonObj(bus); 56 #endif 57 // Retrieve and set trust groups within the trust manager 58 setTrustMgr(getTrustGroups(jsonObj)); 59 // Retrieve fan definitions and create fan objects to be monitored 60 setFans(getFanDefinitions(jsonObj)); 61 setFaultConfig(jsonObj); 62 log<level::INFO>("Configuration loaded"); 63 64 // Since this doesn't run at standby yet, powerStateChanged 65 // will never be called so for now treat start up as the 66 // pgood. When this does run at standby, the 'atPgood' 67 // rules won't need to be checked here. 68 if (_powerState->isPowerOn()) 69 { 70 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 71 [this](auto& rule) { 72 rule->check(PowerRuleState::atPgood, _fanHealth); 73 }); 74 // Runtime rules still need to be checked since fans may already 75 // be missing that could trigger a runtime rule. 76 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 77 [this](auto& rule) { 78 rule->check(PowerRuleState::runtime, _fanHealth); 79 }); 80 } 81 } 82 83 void System::sighupHandler(sdeventplus::source::Signal&, 84 const struct signalfd_siginfo*) 85 { 86 try 87 { 88 json jsonObj = json::object(); 89 #ifdef MONITOR_USE_JSON 90 jsonObj = getJsonObj(_bus); 91 #endif 92 auto trustGrps = getTrustGroups(jsonObj); 93 auto fanDefs = getFanDefinitions(jsonObj); 94 // Set configured trust groups 95 setTrustMgr(trustGrps); 96 // Clear/set configured fan definitions 97 _fans.clear(); 98 _fanHealth.clear(); 99 setFans(fanDefs); 100 setFaultConfig(jsonObj); 101 log<level::INFO>("Configuration reloaded successfully"); 102 103 if (_powerState->isPowerOn()) 104 { 105 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 106 [this](auto& rule) { 107 rule->check(PowerRuleState::runtime, _fanHealth); 108 }); 109 } 110 } 111 catch (std::runtime_error& re) 112 { 113 log<level::ERR>("Error reloading config, no config changes made", 114 entry("LOAD_ERROR=%s", re.what())); 115 } 116 } 117 118 const std::vector<CreateGroupFunction> 119 System::getTrustGroups(const json& jsonObj) 120 { 121 #ifdef MONITOR_USE_JSON 122 return getTrustGrps(jsonObj); 123 #else 124 return trustGroups; 125 #endif 126 } 127 128 void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs) 129 { 130 _trust = std::make_unique<trust::Manager>(groupFuncs); 131 } 132 133 const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj) 134 { 135 #ifdef MONITOR_USE_JSON 136 return getFanDefs(jsonObj); 137 #else 138 return fanDefinitions; 139 #endif 140 } 141 142 void System::setFans(const std::vector<FanDefinition>& fanDefs) 143 { 144 for (const auto& fanDef : fanDefs) 145 { 146 // Check if a condition exists on the fan 147 auto condition = std::get<conditionField>(fanDef); 148 if (condition) 149 { 150 // Condition exists, skip adding fan if it fails 151 if (!(*condition)(_bus)) 152 { 153 continue; 154 } 155 } 156 _fans.emplace_back( 157 std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this)); 158 159 updateFanHealth(*(_fans.back())); 160 } 161 } 162 163 void System::updateFanHealth(const Fan& fan) 164 { 165 std::vector<bool> sensorStatus; 166 for (const auto& sensor : fan.sensors()) 167 { 168 sensorStatus.push_back(sensor->functional()); 169 } 170 171 _fanHealth[fan.getName()] = 172 std::make_tuple(fan.present(), std::move(sensorStatus)); 173 } 174 175 void System::fanStatusChange(const Fan& fan) 176 { 177 updateFanHealth(fan); 178 179 if (_powerState->isPowerOn()) 180 { 181 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 182 [this](auto& rule) { 183 rule->check(PowerRuleState::runtime, _fanHealth); 184 }); 185 } 186 } 187 188 void System::setFaultConfig(const json& jsonObj) 189 { 190 #ifdef MONITOR_USE_JSON 191 std::shared_ptr<PowerInterfaceBase> powerInterface = 192 std::make_shared<PowerInterface>(_thermalAlert); 193 194 PowerOffAction::PrePowerOffFunc func = 195 std::bind(std::mem_fn(&System::logShutdownError), this); 196 197 _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func); 198 199 _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj); 200 #endif 201 } 202 203 void System::powerStateChanged(bool powerStateOn) 204 { 205 if (powerStateOn) 206 { 207 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 208 [this](auto& rule) { 209 rule->check(PowerRuleState::atPgood, _fanHealth); 210 }); 211 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 212 [this](auto& rule) { 213 rule->check(PowerRuleState::runtime, _fanHealth); 214 }); 215 } 216 else 217 { 218 _thermalAlert.enabled(false); 219 220 // Cancel any in-progress power off actions 221 std::for_each(_powerOffRules.begin(), _powerOffRules.end(), 222 [this](auto& rule) { rule->cancel(); }); 223 } 224 } 225 226 void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor) 227 { 228 std::string fanPath{util::INVENTORY_PATH + fan.getName()}; 229 230 getLogger().log( 231 fmt::format("Creating event log for faulted fan {} sensor {}", fanPath, 232 sensor.name()), 233 Logger::error); 234 235 // In order to know if the event log should have a severity of error or 236 // informational, count the number of existing nonfunctional sensors and 237 // compare it to _numNonfuncSensorsBeforeError. 238 size_t nonfuncSensors = 0; 239 for (const auto& fan : _fans) 240 { 241 for (const auto& s : fan->sensors()) 242 { 243 // Don't count nonfunctional sensors that still have their 244 // error timer running as nonfunctional since they haven't 245 // had event logs created for those errors yet. 246 if (!s->functional() && !s->errorTimerRunning()) 247 { 248 nonfuncSensors++; 249 } 250 } 251 } 252 253 Severity severity = Severity::Error; 254 if (nonfuncSensors < _numNonfuncSensorsBeforeError) 255 { 256 severity = Severity::Informational; 257 } 258 259 auto error = 260 std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault", 261 fanPath, sensor.name(), severity); 262 263 auto sensorData = captureSensorData(); 264 error->commit(sensorData); 265 266 // Save the error so it can be committed again on a power off. 267 _lastError = std::move(error); 268 } 269 270 void System::fanMissingErrorTimerExpired(const Fan& fan) 271 { 272 std::string fanPath{util::INVENTORY_PATH + fan.getName()}; 273 274 getLogger().log( 275 fmt::format("Creating event log for missing fan {}", fanPath), 276 Logger::error); 277 278 auto error = std::make_unique<FanError>( 279 "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error); 280 281 auto sensorData = captureSensorData(); 282 error->commit(sensorData); 283 284 // Save the error so it can be committed again on a power off. 285 _lastError = std::move(error); 286 } 287 288 void System::logShutdownError() 289 { 290 if (_lastError) 291 { 292 getLogger().log("Re-committing previous fan error before power off"); 293 294 // Still use the latest sensor data 295 auto sensorData = captureSensorData(); 296 _lastError->commit(sensorData); 297 } 298 } 299 300 json System::captureSensorData() 301 { 302 json data; 303 304 for (const auto& fan : _fans) 305 { 306 for (const auto& sensor : fan->sensors()) 307 { 308 json values; 309 values["present"] = fan->present(); 310 values["functional"] = sensor->functional(); 311 values["tach"] = sensor->getInput(); 312 if (sensor->hasTarget()) 313 { 314 values["target"] = sensor->getTarget(); 315 } 316 317 data["sensors"][sensor->name()] = values; 318 } 319 } 320 321 return data; 322 } 323 324 } // namespace phosphor::fan::monitor 325