1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "system.hpp"
17 
18 #include "fan.hpp"
19 #include "fan_defs.hpp"
20 #include "tach_sensor.hpp"
21 #include "trust_manager.hpp"
22 #include "types.hpp"
23 #include "utility.hpp"
24 #ifdef MONITOR_USE_JSON
25 #include "json_parser.hpp"
26 #endif
27 
28 #include "config.h"
29 
30 #include "hwmon_ffdc.hpp"
31 
32 #include <nlohmann/json.hpp>
33 #include <phosphor-logging/log.hpp>
34 #include <sdbusplus/bus.hpp>
35 #include <sdeventplus/event.hpp>
36 #include <sdeventplus/source/signal.hpp>
37 
38 namespace phosphor::fan::monitor
39 {
40 
41 using json = nlohmann::json;
42 using Severity = sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level;
43 
44 using namespace phosphor::logging;
45 
46 System::System(Mode mode, sdbusplus::bus::bus& bus,
47                const sdeventplus::Event& event) :
48     _mode(mode),
49     _bus(bus), _event(event),
50     _powerState(std::make_unique<PGoodState>(
51         bus, std::bind(std::mem_fn(&System::powerStateChanged), this,
52                        std::placeholders::_1))),
53     _thermalAlert(bus, THERMAL_ALERT_OBJPATH)
54 {}
55 
56 void System::start()
57 {
58     _started = true;
59     json jsonObj = json::object();
60 #ifdef MONITOR_USE_JSON
61     auto confFile =
62         fan::JsonConfig::getConfFile(_bus, confAppName, confFileName);
63     jsonObj = fan::JsonConfig::load(confFile);
64 #endif
65     // Retrieve and set trust groups within the trust manager
66     setTrustMgr(getTrustGroups(jsonObj));
67     // Retrieve fan definitions and create fan objects to be monitored
68     setFans(getFanDefinitions(jsonObj));
69     setFaultConfig(jsonObj);
70     log<level::INFO>("Configuration loaded");
71 
72     if (_powerState->isPowerOn())
73     {
74         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
75                       [this](auto& rule) {
76                           rule->check(PowerRuleState::runtime, _fanHealth);
77                       });
78     }
79 
80     auto sensorMap = buildNameOwnerChangedMap();
81 
82     namespace match = sdbusplus::bus::match;
83 
84     // for each service, register a callback handler for nameOwnerChanged
85     for (const auto& service_itr : sensorMap)
86     {
87         _sensorMatch.push_back(std::make_unique<match::match>(
88             _bus, match::rules::nameOwnerChanged(service_itr.first),
89             std::bind(&System::tachSignalOffline, this, std::placeholders::_1,
90                       sensorMap)));
91     }
92 }
93 
94 SensorMapType System::buildNameOwnerChangedMap() const
95 {
96     SensorMapType sensorMap;
97 
98     // build a list of all interfaces, always including the value interface
99     // using set automatically guards against duplicates
100     std::set<std::string> unique_interfaces{util::FAN_SENSOR_VALUE_INTF};
101 
102     for (const auto& fan : _fans)
103     {
104         for (const auto& sensor : fan->sensors())
105         {
106             unique_interfaces.insert(sensor->getInterface());
107         }
108     }
109     // convert them to vector to pass into getSubTreeRaw
110     std::vector<std::string> interfaces(unique_interfaces.begin(),
111                                         unique_interfaces.end());
112 
113     // get service information for all service names that are
114     // hosting these interfaces
115     auto serviceObjects =
116         util::SDBusPlus::getSubTreeRaw(_bus, FAN_SENSOR_PATH, interfaces, 0);
117 
118     for (const auto& fan : _fans)
119     {
120         // For every sensor in each fan
121         for (const auto& sensor : fan->sensors())
122         {
123             const auto itServ = serviceObjects.find(sensor->name());
124 
125             if (serviceObjects.end() == itServ || itServ->second.empty())
126             {
127                 getLogger().log(
128                     fmt::format("Fan sensor entry {} not found in D-Bus",
129                                 sensor->name()),
130                     Logger::error);
131                 continue;
132             }
133 
134             for (const auto& [serviceName, unused] : itServ->second)
135             {
136                 // map its service name to the sensor
137                 sensorMap[serviceName].insert(sensor);
138             }
139         }
140     }
141 
142     return sensorMap;
143 }
144 
145 void System::sighupHandler(sdeventplus::source::Signal&,
146                            const struct signalfd_siginfo*)
147 {
148     try
149     {
150         json jsonObj = json::object();
151 #ifdef MONITOR_USE_JSON
152         jsonObj = getJsonObj(_bus);
153 #endif
154         auto trustGrps = getTrustGroups(jsonObj);
155         auto fanDefs = getFanDefinitions(jsonObj);
156         // Set configured trust groups
157         setTrustMgr(trustGrps);
158         // Clear/set configured fan definitions
159         _fans.clear();
160         _fanHealth.clear();
161         setFans(fanDefs);
162         setFaultConfig(jsonObj);
163         log<level::INFO>("Configuration reloaded successfully");
164 
165         if (_powerState->isPowerOn())
166         {
167             std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
168                           [this](auto& rule) {
169                               rule->check(PowerRuleState::runtime, _fanHealth);
170                           });
171         }
172     }
173     catch (std::runtime_error& re)
174     {
175         log<level::ERR>("Error reloading config, no config changes made",
176                         entry("LOAD_ERROR=%s", re.what()));
177     }
178 }
179 
180 const std::vector<CreateGroupFunction>
181     System::getTrustGroups(const json& jsonObj)
182 {
183 #ifdef MONITOR_USE_JSON
184     return getTrustGrps(jsonObj);
185 #else
186     return trustGroups;
187 #endif
188 }
189 
190 void System::setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs)
191 {
192     _trust = std::make_unique<trust::Manager>(groupFuncs);
193 }
194 
195 const std::vector<FanDefinition> System::getFanDefinitions(const json& jsonObj)
196 {
197 #ifdef MONITOR_USE_JSON
198     return getFanDefs(jsonObj);
199 #else
200     return fanDefinitions;
201 #endif
202 }
203 
204 void System::setFans(const std::vector<FanDefinition>& fanDefs)
205 {
206     for (const auto& fanDef : fanDefs)
207     {
208         // Check if a condition exists on the fan
209         auto condition = std::get<conditionField>(fanDef);
210         if (condition)
211         {
212             // Condition exists, skip adding fan if it fails
213             if (!(*condition)(_bus))
214             {
215                 continue;
216             }
217         }
218         _fans.emplace_back(
219             std::make_unique<Fan>(_mode, _bus, _event, _trust, fanDef, *this));
220 
221         updateFanHealth(*(_fans.back()));
222     }
223 }
224 
225 // callback indicating a service went [on|off]line.
226 // Determine on/offline status, set all sensors for that service
227 // to new state
228 //
229 void System::tachSignalOffline(sdbusplus::message::message& msg,
230                                SensorMapType const& sensorMap)
231 {
232     std::string serviceName, oldOwner, newOwner;
233 
234     msg.read(serviceName);
235     msg.read(oldOwner);
236     msg.read(newOwner);
237 
238     // true if sensor server came back online, false -> went offline
239     bool hasOwner = !newOwner.empty() && oldOwner.empty();
240 
241     std::string stateStr(hasOwner ? "online" : "offline");
242     getLogger().log(fmt::format("Changing sensors for service {} to {}",
243                                 serviceName, stateStr),
244                     Logger::info);
245 
246     auto sensorItr(sensorMap.find(serviceName));
247 
248     if (sensorItr != sensorMap.end())
249     {
250         // set all sensors' owner state to not-owned
251         for (auto& sensor : sensorItr->second)
252         {
253             sensor->setOwner(hasOwner);
254             sensor->getFan().process(*sensor);
255         }
256     }
257 }
258 
259 void System::updateFanHealth(const Fan& fan)
260 {
261     std::vector<bool> sensorStatus;
262     for (const auto& sensor : fan.sensors())
263     {
264         sensorStatus.push_back(sensor->functional());
265     }
266 
267     _fanHealth[fan.getName()] =
268         std::make_tuple(fan.present(), std::move(sensorStatus));
269 }
270 
271 void System::fanStatusChange(const Fan& fan, bool skipRulesCheck)
272 {
273     updateFanHealth(fan);
274 
275     if (_powerState->isPowerOn() && !skipRulesCheck)
276     {
277         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
278                       [this](auto& rule) {
279                           rule->check(PowerRuleState::runtime, _fanHealth);
280                       });
281     }
282 }
283 
284 void System::setFaultConfig(const json& jsonObj)
285 {
286 #ifdef MONITOR_USE_JSON
287     std::shared_ptr<PowerInterfaceBase> powerInterface =
288         std::make_shared<PowerInterface>(_thermalAlert);
289 
290     PowerOffAction::PrePowerOffFunc func =
291         std::bind(std::mem_fn(&System::logShutdownError), this);
292 
293     _powerOffRules = getPowerOffRules(jsonObj, powerInterface, func);
294 
295     _numNonfuncSensorsBeforeError = getNumNonfuncRotorsBeforeError(jsonObj);
296 #endif
297 }
298 
299 void System::powerStateChanged(bool powerStateOn)
300 {
301     std::for_each(_fans.begin(), _fans.end(), [powerStateOn](auto& fan) {
302         fan->powerStateChanged(powerStateOn);
303     });
304 
305     if (powerStateOn)
306     {
307         if (!_started)
308         {
309             log<level::ERR>("No conf file found at power on");
310             throw std::runtime_error("No conf file found at power on");
311         }
312 
313         // If no fan has its sensors on D-Bus, then there is a problem
314         // with the fan controller.  Log an error and shut down.
315         if (std::all_of(_fans.begin(), _fans.end(), [](const auto& fan) {
316                 return fan->numSensorsOnDBusAtPowerOn() == 0;
317             }))
318         {
319             handleOfflineFanController();
320             return;
321         }
322 
323         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
324                       [this](auto& rule) {
325                           rule->check(PowerRuleState::atPgood, _fanHealth);
326                       });
327         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
328                       [this](auto& rule) {
329                           rule->check(PowerRuleState::runtime, _fanHealth);
330                       });
331     }
332     else
333     {
334         _thermalAlert.enabled(false);
335 
336         // Cancel any in-progress power off actions
337         std::for_each(_powerOffRules.begin(), _powerOffRules.end(),
338                       [this](auto& rule) { rule->cancel(); });
339     }
340 }
341 
342 void System::sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor)
343 {
344     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
345 
346     getLogger().log(
347         fmt::format("Creating event log for faulted fan {} sensor {}", fanPath,
348                     sensor.name()),
349         Logger::error);
350 
351     // In order to know if the event log should have a severity of error or
352     // informational, count the number of existing nonfunctional sensors and
353     // compare it to _numNonfuncSensorsBeforeError.
354     size_t nonfuncSensors = 0;
355     for (const auto& fan : _fans)
356     {
357         for (const auto& s : fan->sensors())
358         {
359             // Don't count nonfunctional sensors that still have their
360             // error timer running as nonfunctional since they haven't
361             // had event logs created for those errors yet.
362             if (!s->functional() && !s->errorTimerRunning())
363             {
364                 nonfuncSensors++;
365             }
366         }
367     }
368 
369     Severity severity = Severity::Error;
370     if (nonfuncSensors < _numNonfuncSensorsBeforeError)
371     {
372         severity = Severity::Informational;
373     }
374 
375     auto error =
376         std::make_unique<FanError>("xyz.openbmc_project.Fan.Error.Fault",
377                                    fanPath, sensor.name(), severity);
378 
379     auto sensorData = captureSensorData();
380     error->commit(sensorData);
381 
382     // Save the error so it can be committed again on a power off.
383     _lastError = std::move(error);
384 }
385 
386 void System::fanMissingErrorTimerExpired(const Fan& fan)
387 {
388     std::string fanPath{util::INVENTORY_PATH + fan.getName()};
389 
390     getLogger().log(
391         fmt::format("Creating event log for missing fan {}", fanPath),
392         Logger::error);
393 
394     auto error = std::make_unique<FanError>(
395         "xyz.openbmc_project.Fan.Error.Missing", fanPath, "", Severity::Error);
396 
397     auto sensorData = captureSensorData();
398     error->commit(sensorData);
399 
400     // Save the error so it can be committed again on a power off.
401     _lastError = std::move(error);
402 }
403 
404 void System::logShutdownError()
405 {
406     if (_lastError)
407     {
408         getLogger().log("Re-committing previous fan error before power off");
409 
410         // Still use the latest sensor data
411         auto sensorData = captureSensorData();
412         _lastError->commit(sensorData, true);
413     }
414 }
415 
416 json System::captureSensorData()
417 {
418     json data;
419 
420     for (const auto& fan : _fans)
421     {
422         for (const auto& sensor : fan->sensors())
423         {
424             json values;
425             values["present"] = fan->present();
426             values["functional"] = sensor->functional();
427             values["tach"] = sensor->getInput();
428             if (sensor->hasTarget())
429             {
430                 values["target"] = sensor->getTarget();
431             }
432 
433             data["sensors"][sensor->name()] = values;
434         }
435     }
436 
437     return data;
438 }
439 
440 void System::handleOfflineFanController()
441 {
442     getLogger().log("The fan controller appears to be offline.  Shutting down.",
443                     Logger::error);
444 
445     auto ffdc = collectHwmonFFDC();
446 
447     FanError error{"xyz.openbmc_project.Fan.Error.FanControllerOffline",
448                    Severity::Critical};
449     error.commit(ffdc, true);
450 
451     PowerInterface::executeHardPowerOff();
452 }
453 
454 } // namespace phosphor::fan::monitor
455