1 /** 2 * Copyright © 2020 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include "fan.hpp" 19 #include "fan_error.hpp" 20 #include "power_off_rule.hpp" 21 #include "power_state.hpp" 22 #include "tach_sensor.hpp" 23 #include "trust_manager.hpp" 24 #include "types.hpp" 25 26 #include <nlohmann/json.hpp> 27 #include <sdbusplus/bus.hpp> 28 #include <sdeventplus/event.hpp> 29 #include <sdeventplus/source/signal.hpp> 30 31 #include <memory> 32 #include <optional> 33 #include <vector> 34 35 namespace phosphor::fan::monitor 36 { 37 38 using json = nlohmann::json; 39 40 // Mapping from service name to sensor 41 using SensorMapType = 42 std::map<std::string, std::set<std::shared_ptr<TachSensor>>>; 43 44 class System 45 { 46 public: 47 System() = delete; 48 ~System() = default; 49 System(const System&) = delete; 50 System(System&&) = delete; 51 System& operator=(const System&) = delete; 52 System& operator=(System&&) = delete; 53 54 /** 55 * Constructor 56 * 57 * @param[in] mode - mode of fan monitor 58 * @param[in] bus - sdbusplus bus object 59 * @param[in] event - event loop reference 60 */ 61 System(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event); 62 63 /** 64 * @brief Callback function to handle receiving a HUP signal to reload the 65 * JSON configuration. 66 */ 67 void sighupHandler(sdeventplus::source::Signal&, 68 const struct signalfd_siginfo*); 69 70 /** 71 * @brief Called from the fan when it changes either 72 * present or functional status to update the 73 * fan health map. 74 * 75 * @param[in] fan - The fan that changed 76 * @param[in] skipRulesCheck - If the rules checks should be done now. 77 */ 78 void fanStatusChange(const Fan& fan, bool skipRulesCheck = false); 79 80 /** 81 * @brief Called when a fan sensor's error timer expires, which 82 * happens when the sensor has been nonfunctional for a 83 * certain amount of time. An event log will be created. 84 * 85 * @param[in] fan - The parent fan of the sensor 86 * @param[in] sensor - The faulted sensor 87 */ 88 void sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor); 89 90 /** 91 * @brief Called when the timer that starts when a fan is missing 92 * has expired so an event log needs to be created. 93 * 94 * @param[in] fan - The missing fan. 95 */ 96 void fanMissingErrorTimerExpired(const Fan& fan); 97 98 /** 99 * @brief Called by the power off actions to log an error when there is 100 * a power off due to fan problems. 101 * 102 * The error it logs is just the last fan error that occurred. 103 */ 104 void logShutdownError(); 105 106 /** 107 * @brief Returns true if power is on 108 */ 109 bool isPowerOn() const 110 { 111 return _powerState->isPowerOn(); 112 } 113 114 /** 115 * @brief tests the presence of Inventory and calls load() if present, else 116 * waits for Inventory asynchronously and has a callback to load() when 117 * present 118 */ 119 void start(); 120 121 /** 122 * @brief Parses and populates the fan monitor trust groups and list of fans 123 */ 124 void load(); 125 126 private: 127 /** 128 * @brief Callback from D-Bus when Inventory service comes online 129 * 130 * @param[in] msg - Service details. 131 */ 132 void inventoryOnlineCb(sdbusplus::message_t& msg); 133 134 /** 135 * @brief Create a BMC Dump 136 */ 137 void createBmcDump() const; 138 139 /* The mode of fan monitor */ 140 Mode _mode; 141 142 /* The sdbusplus bus object */ 143 sdbusplus::bus_t& _bus; 144 145 /* The event loop reference */ 146 const sdeventplus::Event& _event; 147 148 /* Trust manager of trust groups */ 149 std::unique_ptr<phosphor::fan::trust::Manager> _trust; 150 151 /* match object to detect Inventory service */ 152 std::unique_ptr<sdbusplus::bus::match_t> _inventoryMatch; 153 154 /* List of fan objects to monitor */ 155 std::vector<std::unique_ptr<Fan>> _fans; 156 157 /** 158 * @brief The latest health of all the fans 159 */ 160 FanHealth _fanHealth; 161 162 /** 163 * @brief The object to watch the power state 164 */ 165 std::unique_ptr<PowerState> _powerState; 166 167 /** 168 * @brief The power off rules, for shutting down the system 169 * due to fan failures. 170 */ 171 std::vector<std::unique_ptr<PowerOffRule>> _powerOffRules; 172 173 /** 174 * @brief The number of concurrently nonfunctional fan sensors 175 * there must be for an event log created due to a 176 * nonfunctional fan sensor to have an Error severity as 177 * opposed to an Informational one. 178 */ 179 std::optional<size_t> _numNonfuncSensorsBeforeError; 180 181 /** 182 * @brief The most recently committed fan error. 183 */ 184 std::unique_ptr<FanError> _lastError; 185 186 /** 187 * @brief The thermal alert D-Bus object 188 */ 189 ThermalAlertObject _thermalAlert; 190 191 /** 192 * @brief The tach sensors D-Bus match objects 193 */ 194 std::vector<std::unique_ptr<sdbusplus::bus::match_t>> _sensorMatch; 195 196 /** 197 * @brief true if config files have been loaded 198 */ 199 bool _loaded = false; 200 201 /** 202 * @brief Captures tach sensor data as JSON for use in 203 * fan fault and fan missing event logs. 204 * 205 * @return json - The JSON data 206 */ 207 json captureSensorData(); 208 209 /** 210 * @brief creates a subscription (service->sensor) to take sensors 211 * on/offline when D-Bus starts/stops updating values 212 * 213 */ 214 void subscribeSensorsToServices(); 215 216 /** 217 * @brief Retrieve the configured trust groups 218 * 219 * @param[in] jsonObj - JSON object to parse from 220 * 221 * @return List of functions applied on trust groups 222 */ 223 const std::vector<CreateGroupFunction> getTrustGroups(const json& jsonObj); 224 225 /** 226 * @brief Set the trust manager's list of trust group functions 227 * 228 * @param[in] groupFuncs - list of trust group functions 229 */ 230 void setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs); 231 232 /** 233 * @brief Retrieve the configured fan definitions 234 * 235 * @param[in] jsonObj - JSON object to parse from 236 * 237 * @return List of fan definition data on the fans configured 238 */ 239 const std::vector<FanDefinition> getFanDefinitions(const json& jsonObj); 240 241 /** 242 * @brief Set the list of fans to be monitored 243 * 244 * @param[in] fanDefs - list of fan definitions to create fans monitored 245 */ 246 void setFans(const std::vector<FanDefinition>& fanDefs); 247 248 /** 249 * @brief Updates the fan health map entry for the fan passed in 250 * 251 * @param[in] fan - The fan to update the health map with 252 */ 253 void updateFanHealth(const Fan& fan); 254 255 /** 256 * @brief callback when a tach sensor signal goes offline 257 * 258 * @param[in] msg - D-Bus message containing details (inc. service name) 259 * 260 * @param[in] sensorMap - map providing sensor access for each service 261 */ 262 void tachSignalOffline(sdbusplus::message_t& msg, 263 const SensorMapType& sensorMap); 264 265 /** 266 * @brief The function that runs when the power state changes 267 * 268 * @param[in] powerStateOn - If power is now on or not 269 */ 270 void powerStateChanged(bool powerStateOn); 271 272 /** 273 * @brief Reads the fault configuration from the JSON config 274 * file, such as the power off rule configuration. 275 * 276 * @param[in] jsonObj - JSON object to parse from 277 */ 278 void setFaultConfig(const json& jsonObj); 279 280 /** 281 * @brief Log an error and shut down due to an offline fan controller 282 */ 283 void handleOfflineFanController(); 284 }; 285 286 } // namespace phosphor::fan::monitor 287