1 /**
2  * Copyright © 2020 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "system.hpp"
17 
18 #include "fan.hpp"
19 #include "fan_defs.hpp"
20 #include "tach_sensor.hpp"
21 #include "trust_manager.hpp"
22 #include "types.hpp"
23 #ifdef MONITOR_USE_JSON
24 #include "json_parser.hpp"
25 #endif
26 
27 #include "config.h"
28 
29 #include "hwmon_ffdc.hpp"
30 
31 #include <nlohmann/json.hpp>
32 #include <phosphor-logging/log.hpp>
33 #include <sdbusplus/bus.hpp>
34 #include <sdeventplus/event.hpp>
35 #include <sdeventplus/source/signal.hpp>
36 
37 namespace phosphor::fan::monitor
38 {
39 
40 using json = nlohmann::json;
41 using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
42 
43 using namespace phosphor::logging;
44 
45 System::System(Mode mode, sdbusplus::bus::bus& bus,
46                const sdeventplus::Event& event) :
47     _mode(mode),
48     _bus(bus), _event(event),
49     _powerState(std::make_unique<PGoodState>(
50         bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
51                        std::placeholders::_1))),
52     _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
53 {}
54 
55 void System::start()
56 {
57     _started = true;
58     json jsonObj = json::object();
59 #ifdef MONITOR_USE_JSON
60     auto confFile =
61         fan::JsonConfig::getConfFile(_bus, confAppName, confFileName);
62     jsonObj = fan::JsonConfig::load(confFile);
63 #endif
64     // Retrieve and set trust groups within the trust manager
65     setTrustMgr(getTrustGroups(jsonObj));
66     // Retrieve fan definitions and create fan objects to be monitored
67     setFans(getFanDefinitions(jsonObj));
68     setFaultConfig(jsonObj);
69     log<level::INFO>("Configuration loaded");
70 
71     if (_powerState->isPowerOn())
72     {
73         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
74                       [this](auto& rule) {
75                           rule->check(PowerRuleState::runtime, _fanHealth);
76                       });
77     }
78 }
79 
80 void System::sighupHandler(sdeventplus::source::Signal&,
81                            const struct signalfd_siginfo*)
82 {
83     try
84     {
85         json jsonObj = json::object();
86 #ifdef MONITOR_USE_JSON
87         jsonObj = getJsonObj(_bus);
88 #endif
89         auto trustGrps = getTrustGroups(jsonObj);
90         auto fanDefs = getFanDefinitions(jsonObj);
91         // Set configured trust groups
92         setTrustMgr(trustGrps);
93         // Clear/set configured fan definitions
94         _fans.clear();
95         _fanHealth.clear();
96         setFans(fanDefs);
97         setFaultConfig(jsonObj);
98         log<level::INFO>("Configuration reloaded successfully");
99 
100         if (_powerState->isPowerOn())
101         {
102             std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
103                           [this](auto& rule) {
104                               rule->check(PowerRuleState::runtime, _fanHealth);
105                           });
106         }
107     }
108     catch (std::runtime_error& re)
109     {
110         log<level::ERR>("Error reloading config, no config changes made",
111                         entry("LOAD_ERROR=%s", re.what()));
112     }
113 }
114 
115 const std::vector<CreateGroupFunction>
116     System::getTrustGroups(const json& jsonObj)
117 {
118 #ifdef MONITOR_USE_JSON
119     return getTrustGrps(jsonObj);
120 #else
121     return trustGroups;
122 #endif
123 }
124 
125 void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
126 {
127     _trust = std::make_unique<trust::Manager>(groupFuncs);
128 }
129 
130 const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
131 {
132 #ifdef MONITOR_USE_JSON
133     return getFanDefs(jsonObj);
134 #else
135     return fanDefinitions;
136 #endif
137 }
138 
139 void System::setFans(const std::vector<FanDefinition>& fanDefs)
140 {
141     for (const auto& fanDef : fanDefs)
142     {
143         // Check if a condition exists on the fan
144         auto condition = std::get<conditionField>(fanDef);
145         if (condition)
146         {
147             // Condition exists, skip adding fan if it fails
148             if (!(*condition)(_bus))
149             {
150                 continue;
151             }
152         }
153         _fans.emplace_back(
154             std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
155 
156         updateFanHealth(*(_fans.back()));
157     }
158 }
159 
160 void System::updateFanHealth(const Fan& fan)
161 {
162     std::vector<bool> sensorStatus;
163     for (const auto& sensor : fan.sensors())
164     {
165         sensorStatus.push_back(sensor->functional());
166     }
167 
168     _fanHealth[fan.getName()] =
169         std::make_tuple(fan.present(), std::move(sensorStatus));
170 }
171 
172 void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
173 {
174     updateFanHealth(fan);
175 
176     if (_powerState->isPowerOn() && !skipRulesCheck)
177     {
178         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
179                       [this](auto& rule) {
180                           rule->check(PowerRuleState::runtime, _fanHealth);
181                       });
182     }
183 }
184 
185 void System::setFaultConfig(const json& jsonObj)
186 {
187 #ifdef MONITOR_USE_JSON
188     std::shared_ptr<PowerInterfaceBase> powerInterface =
189         std::make_shared<PowerInterface>(_thermalAlert);
190 
191     PowerOffAction::PrePowerOffFunc func =
192         std::bind(std::mem_fn(&System::logShutdownError), this);
193 
194     _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
195 
196     _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
197 #endif
198 }
199 
200 void System::powerStateChanged(bool powerStateOn)
201 {
202     std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
203         fan->powerStateChanged(powerStateOn);
204     });
205 
206     if (powerStateOn)
207     {
208         if (!_started)
209         {
210             log<level::ERR>("No conf file found at power on");
211             throw std::runtime_error("No conf file found at power on");
212         }
213 
214         // If no fan has its sensors on D-Bus, then there is a problem
215         // with the fan controller.  Log an error and shut down.
216         if (std::all_of(_fans.begin(), _fans.end(), [](const auto& fan) {
217                 return fan->numSensorsOnDBusAtPowerOn() == 0;
218             }))
219         {
220             handleOfflineFanController();
221             return;
222         }
223 
224         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
225                       [this](auto& rule) {
226                           rule->check(PowerRuleState::atPgood, _fanHealth);
227                       });
228         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
229                       [this](auto& rule) {
230                           rule->check(PowerRuleState::runtime, _fanHealth);
231                       });
232     }
233     else
234     {
235         _thermalAlert.enabled(false);
236 
237         // Cancel any in-progress power off actions
238         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
239                       [this](auto& rule) { rule->cancel(); });
240     }
241 }
242 
243 void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
244 {
245     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
246 
247     getLogger().log(
248         fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
249                     sensor.name()),
250         Logger::error);
251 
252     // In order to know if the event log should have a severity of error or
253     // informational, count the number of existing nonfunctional sensors and
254     // compare it to _numNonfuncSensorsBeforeError.
255     size_t nonfuncSensors = 0;
256     for (const auto& fan : _fans)
257     {
258         for (const auto& s : fan->sensors())
259         {
260             // Don't count nonfunctional sensors that still have their
261             // error timer running as nonfunctional since they haven't
262             // had event logs created for those errors yet.
263             if (!s->functional() && !s->errorTimerRunning())
264             {
265                 nonfuncSensors++;
266             }
267         }
268     }
269 
270     Severity severity = Severity::Error;
271     if (nonfuncSensors < _numNonfuncSensorsBeforeError)
272     {
273         severity = Severity::Informational;
274     }
275 
276     auto error =
277         std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
278                                    fanPath, sensor.name(), severity);
279 
280     auto sensorData = captureSensorData();
281     error->commit(sensorData);
282 
283     // Save the error so it can be committed again on a power off.
284     _lastError = std::move(error);
285 }
286 
287 void System::fanMissingErrorTimerExpired(const Fan& fan)
288 {
289     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
290 
291     getLogger().log(
292         fmt::format("Creating event log for missing fan {}", fanPath),
293         Logger::error);
294 
295     auto error = std::make_unique<FanError>(
296         "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
297 
298     auto sensorData = captureSensorData();
299     error->commit(sensorData);
300 
301     // Save the error so it can be committed again on a power off.
302     _lastError = std::move(error);
303 }
304 
305 void System::logShutdownError()
306 {
307     if (_lastError)
308     {
309         getLogger().log("Re-committing previous fan error before power off");
310 
311         // Still use the latest sensor data
312         auto sensorData = captureSensorData();
313         _lastError->commit(sensorData, true);
314     }
315 }
316 
317 json System::captureSensorData()
318 {
319     json data;
320 
321     for (const auto& fan : _fans)
322     {
323         for (const auto& sensor : fan->sensors())
324         {
325             json values;
326             values["present"] = fan->present();
327             values["functional"] = sensor->functional();
328             values["tach"] = sensor->getInput();
329             if (sensor->hasTarget())
330             {
331                 values["target"] = sensor->getTarget();
332             }
333 
334             data["sensors"][sensor->name()] = values;
335         }
336     }
337 
338     return data;
339 }
340 
341 void System::handleOfflineFanController()
342 {
343     getLogger().log("The fan controller appears to be offline.  Shutting down.",
344                     Logger::error);
345 
346     auto ffdc = collectHwmonFFDC();
347 
348     FanError error{"xyz.openbmc_project.Fan.Error.FanControllerOffline",
349                    Severity::Critical};
350     error.commit(ffdc, true);
351 
352     PowerInterface::executeHardPowerOff();
353 }
354 
355 } // namespace phosphor::fan::monitor
356