1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "system.hpp"
17 
18 #include "fan.hpp"
19 #include "fan_defs.hpp"
20 #include "tach_sensor.hpp"
21 #include "trust_manager.hpp"
22 #include "types.hpp"
23 #include "utility.hpp"
24 #ifdef MONITOR_USE_JSON
25 #include "json_parser.hpp"
26 #endif
27 
28 #include "config.h"
29 
30 #include "hwmon_ffdc.hpp"
31 
32 #include <nlohmann/json.hpp>
33 #include <phosphor-logging/log.hpp>
34 #include <sdbusplus/bus.hpp>
35 #include <sdeventplus/event.hpp>
36 #include <sdeventplus/source/signal.hpp>
37 
38 namespace phosphor::fan::monitor
39 {
40 
41 using json = nlohmann::json;
42 using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
43 
44 using namespace phosphor::logging;
45 
46 System::System(Mode mode, sdbusplus::bus::bus& bus,
47                const sdeventplus::Event& event) :
48     _mode(mode),
49     _bus(bus), _event(event),
50     _powerState(std::make_unique<PGoodState>(
51         bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
52                        std::placeholders::_1))),
53     _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
54 {}
55 
56 void System::start()
57 {
58     _started = true;
59     json jsonObj = json::object();
60 #ifdef MONITOR_USE_JSON
61     auto confFile =
62         fan::JsonConfig::getConfFile(_bus, confAppName, confFileName);
63     jsonObj = fan::JsonConfig::load(confFile);
64 #endif
65     // Retrieve and set trust groups within the trust manager
66     setTrustMgr(getTrustGroups(jsonObj));
67     // Retrieve fan definitions and create fan objects to be monitored
68     setFans(getFanDefinitions(jsonObj));
69     setFaultConfig(jsonObj);
70     log<level::INFO>("Configuration loaded");
71 
72     if (_powerState->isPowerOn())
73     {
74         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
75                       [this](auto& rule) {
76                           rule->check(PowerRuleState::runtime, _fanHealth);
77                       });
78     }
79 
80     if (_sensorMatch.empty())
81     {
82         subscribeSensorsToServices();
83     }
84 }
85 
86 void System::subscribeSensorsToServices()
87 {
88     namespace match = sdbusplus::bus::match;
89 
90     SensorMapType sensorMap;
91 
92     // build a list of all interfaces, always including the value interface
93     // using set automatically guards against duplicates
94     std::set<std::string> unique_interfaces{util::FAN_SENSOR_VALUE_INTF};
95 
96     for (const auto& fan : _fans)
97     {
98         for (const auto& sensor : fan->sensors())
99         {
100             unique_interfaces.insert(sensor->getInterface());
101         }
102     }
103     // convert them to vector to pass into getSubTreeRaw
104     std::vector<std::string> interfaces(unique_interfaces.begin(),
105                                         unique_interfaces.end());
106 
107     try
108     {
109         // get service information for all service names that are
110         // hosting these interfaces
111         auto serviceObjects = util::SDBusPlus::getSubTreeRaw(
112             _bus, FAN_SENSOR_PATH, interfaces, 0);
113 
114         for (const auto& fan : _fans)
115         {
116             // For every sensor in each fan
117             for (const auto& sensor : fan->sensors())
118             {
119                 const auto itServ = serviceObjects.find(sensor->name());
120 
121                 if (serviceObjects.end() == itServ || itServ->second.empty())
122                 {
123                     getLogger().log(
124                         fmt::format("Fan sensor entry {} not found in D-Bus",
125                                     sensor->name()),
126                         Logger::error);
127                     continue;
128                 }
129 
130                 for (const auto& [serviceName, unused] : itServ->second)
131                 {
132                     // associate service name with sensor
133                     sensorMap[serviceName].insert(sensor);
134                 }
135             }
136         }
137 
138         // only create 1 match per service
139         for (const auto& [serviceName, unused] : sensorMap)
140         {
141             // map its service name to the sensor
142             _sensorMatch.emplace_back(std::make_unique<match::match>(
143                 _bus, match::rules::nameOwnerChanged(serviceName),
144                 std::bind(&System::tachSignalOffline, this,
145                           std::placeholders::_1, sensorMap)));
146         }
147     }
148     catch (const util::DBusError&)
149     {
150         // catch exception from getSubTreeRaw() when fan sensor paths don't
151         // exist yet
152     }
153 }
154 
155 void System::sighupHandler(sdeventplus::source::Signal&,
156                            const struct signalfd_siginfo*)
157 {
158     try
159     {
160         json jsonObj = json::object();
161 #ifdef MONITOR_USE_JSON
162         jsonObj = getJsonObj(_bus);
163 #endif
164         auto trustGrps = getTrustGroups(jsonObj);
165         auto fanDefs = getFanDefinitions(jsonObj);
166         // Set configured trust groups
167         setTrustMgr(trustGrps);
168         // Clear/set configured fan definitions
169         _fans.clear();
170         _fanHealth.clear();
171         setFans(fanDefs);
172         setFaultConfig(jsonObj);
173         log<level::INFO>("Configuration reloaded successfully");
174 
175         if (_powerState->isPowerOn())
176         {
177             std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
178                           [this](auto& rule) {
179                               rule->check(PowerRuleState::runtime, _fanHealth);
180                           });
181         }
182 
183         _sensorMatch.clear();
184         subscribeSensorsToServices();
185     }
186     catch (std::runtime_error& re)
187     {
188         log<level::ERR>("Error reloading config, no config changes made",
189                         entry("LOAD_ERROR=%s", re.what()));
190     }
191 }
192 
193 const std::vector<CreateGroupFunction>
194     System::getTrustGroups(const json& jsonObj)
195 {
196 #ifdef MONITOR_USE_JSON
197     return getTrustGrps(jsonObj);
198 #else
199     return trustGroups;
200 #endif
201 }
202 
203 void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
204 {
205     _trust = std::make_unique<trust::Manager>(groupFuncs);
206 }
207 
208 const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
209 {
210 #ifdef MONITOR_USE_JSON
211     return getFanDefs(jsonObj);
212 #else
213     return fanDefinitions;
214 #endif
215 }
216 
217 void System::setFans(const std::vector<FanDefinition>& fanDefs)
218 {
219     for (const auto& fanDef : fanDefs)
220     {
221         // Check if a condition exists on the fan
222         auto condition = std::get<conditionField>(fanDef);
223         if (condition)
224         {
225             // Condition exists, skip adding fan if it fails
226             if (!(*condition)(_bus))
227             {
228                 continue;
229             }
230         }
231         _fans.emplace_back(
232             std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
233 
234         updateFanHealth(*(_fans.back()));
235     }
236 }
237 
238 // callback indicating a service went [on|off]line.
239 // Determine on/offline status, set all sensors for that service
240 // to new state
241 //
242 void System::tachSignalOffline(sdbusplus::message::message& msg,
243                                SensorMapType const& sensorMap)
244 {
245     std::string serviceName, oldOwner, newOwner;
246 
247     msg.read(serviceName);
248     msg.read(oldOwner);
249     msg.read(newOwner);
250 
251     // true if sensor server came back online, false -> went offline
252     bool hasOwner = !newOwner.empty() && oldOwner.empty();
253 
254     std::string stateStr(hasOwner ? "online" : "offline");
255     getLogger().log(fmt::format("Changing sensors for service {} to {}",
256                                 serviceName, stateStr),
257                     Logger::info);
258 
259     auto sensorItr(sensorMap.find(serviceName));
260 
261     if (sensorItr != sensorMap.end())
262     {
263         // set all sensors' owner state to not-owned
264         for (auto& sensor : sensorItr->second)
265         {
266             sensor->setOwner(hasOwner);
267             sensor->getFan().process(*sensor);
268         }
269     }
270 }
271 
272 void System::updateFanHealth(const Fan& fan)
273 {
274     std::vector<bool> sensorStatus;
275     for (const auto& sensor : fan.sensors())
276     {
277         sensorStatus.push_back(sensor->functional());
278     }
279 
280     _fanHealth[fan.getName()] =
281         std::make_tuple(fan.present(), std::move(sensorStatus));
282 }
283 
284 void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
285 {
286     updateFanHealth(fan);
287 
288     if (_powerState->isPowerOn() && !skipRulesCheck)
289     {
290         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
291                       [this](auto& rule) {
292                           rule->check(PowerRuleState::runtime, _fanHealth);
293                       });
294     }
295 }
296 
297 void System::setFaultConfig(const json& jsonObj)
298 {
299 #ifdef MONITOR_USE_JSON
300     std::shared_ptr<PowerInterfaceBase> powerInterface =
301         std::make_shared<PowerInterface>(_thermalAlert);
302 
303     PowerOffAction::PrePowerOffFunc func =
304         std::bind(std::mem_fn(&System::logShutdownError), this);
305 
306     _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
307 
308     _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
309 #endif
310 }
311 
312 void System::powerStateChanged(bool powerStateOn)
313 {
314     std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
315         fan->powerStateChanged(powerStateOn);
316     });
317 
318     if (powerStateOn)
319     {
320         if (!_started)
321         {
322             log<level::ERR>("No conf file found at power on");
323             throw std::runtime_error("No conf file found at power on");
324         }
325 
326         // If no fan has its sensors on D-Bus, then there is a problem
327         // with the fan controller.  Log an error and shut down.
328         if (std::all_of(_fans.begin(), _fans.end(), [](const auto& fan) {
329                 return fan->numSensorsOnDBusAtPowerOn() == 0;
330             }))
331         {
332             handleOfflineFanController();
333             return;
334         }
335 
336         if (_sensorMatch.empty())
337         {
338             subscribeSensorsToServices();
339         }
340 
341         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
342                       [this](auto& rule) {
343                           rule->check(PowerRuleState::atPgood, _fanHealth);
344                       });
345         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
346                       [this](auto& rule) {
347                           rule->check(PowerRuleState::runtime, _fanHealth);
348                       });
349     }
350     else
351     {
352         _thermalAlert.enabled(false);
353 
354         // Cancel any in-progress power off actions
355         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
356                       [this](auto& rule) { rule->cancel(); });
357     }
358 }
359 
360 void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
361 {
362     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
363 
364     getLogger().log(
365         fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
366                     sensor.name()),
367         Logger::error);
368 
369     // In order to know if the event log should have a severity of error or
370     // informational, count the number of existing nonfunctional sensors and
371     // compare it to _numNonfuncSensorsBeforeError.
372     size_t nonfuncSensors = 0;
373     for (const auto& fan : _fans)
374     {
375         for (const auto& s : fan->sensors())
376         {
377             // Don't count nonfunctional sensors that still have their
378             // error timer running as nonfunctional since they haven't
379             // had event logs created for those errors yet.
380             if (!s->functional() && !s->errorTimerRunning())
381             {
382                 nonfuncSensors++;
383             }
384         }
385     }
386 
387     Severity severity = Severity::Error;
388     if (nonfuncSensors < _numNonfuncSensorsBeforeError)
389     {
390         severity = Severity::Informational;
391     }
392 
393     auto error =
394         std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
395                                    fanPath, sensor.name(), severity);
396 
397     auto sensorData = captureSensorData();
398     error->commit(sensorData);
399 
400     // Save the error so it can be committed again on a power off.
401     _lastError = std::move(error);
402 }
403 
404 void System::fanMissingErrorTimerExpired(const Fan& fan)
405 {
406     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
407 
408     getLogger().log(
409         fmt::format("Creating event log for missing fan {}", fanPath),
410         Logger::error);
411 
412     auto error = std::make_unique<FanError>(
413         "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
414 
415     auto sensorData = captureSensorData();
416     error->commit(sensorData);
417 
418     // Save the error so it can be committed again on a power off.
419     _lastError = std::move(error);
420 }
421 
422 void System::logShutdownError()
423 {
424     if (_lastError)
425     {
426         getLogger().log("Re-committing previous fan error before power off");
427 
428         // Still use the latest sensor data
429         auto sensorData = captureSensorData();
430         _lastError->commit(sensorData, true);
431     }
432 }
433 
434 json System::captureSensorData()
435 {
436     json data;
437 
438     for (const auto& fan : _fans)
439     {
440         for (const auto& sensor : fan->sensors())
441         {
442             json values;
443             values["present"] = fan->present();
444             values["functional"] = sensor->functional();
445             values["tach"] = sensor->getInput();
446             if (sensor->hasTarget())
447             {
448                 values["target"] = sensor->getTarget();
449             }
450 
451             data["sensors"][sensor->name()] = values;
452         }
453     }
454 
455     return data;
456 }
457 
458 void System::handleOfflineFanController()
459 {
460     getLogger().log("The fan controller appears to be offline.  Shutting down.",
461                     Logger::error);
462 
463     auto ffdc = collectHwmonFFDC();
464 
465     FanError error{"xyz.openbmc_project.Fan.Error.FanControllerOffline",
466                    Severity::Critical};
467     error.commit(ffdc, true);
468 
469     PowerInterface::executeHardPowerOff();
470 }
471 
472 } // namespace phosphor::fan::monitor
473