1 /** 2 * Copyright © 2020 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include "fan.hpp" 19 #include "fan_error.hpp" 20 #include "power_off_rule.hpp" 21 #include "power_state.hpp" 22 #include "tach_sensor.hpp" 23 #include "trust_manager.hpp" 24 #include "types.hpp" 25 26 #include <nlohmann/json.hpp> 27 #include <sdbusplus/bus.hpp> 28 #include <sdeventplus/event.hpp> 29 #include <sdeventplus/source/signal.hpp> 30 31 #include <memory> 32 #include <optional> 33 #include <vector> 34 35 namespace phosphor::fan::monitor 36 { 37 38 using json = nlohmann::json; 39 40 class System 41 { 42 public: 43 System() = delete; 44 ~System() = default; 45 System(const System&) = delete; 46 System(System&&) = delete; 47 System& operator=(const System&) = delete; 48 System& operator=(System&&) = delete; 49 50 /** 51 * Constructor 52 * 53 * @param[in] mode - mode of fan monitor 54 * @param[in] bus - sdbusplus bus object 55 * @param[in] event - event loop reference 56 */ 57 System(Mode mode, sdbusplus::bus::bus& bus, 58 const sdeventplus::Event& event); 59 60 /** 61 * @brief Callback function to handle receiving a HUP signal to reload the 62 * JSON configuration. 63 */ 64 void sighupHandler(sdeventplus::source::Signal&, 65 const struct signalfd_siginfo*); 66 67 /** 68 * @brief Called from the fan when it changes either 69 * present or functional status to update the 70 * fan health map. 71 * 72 * @param[in] fan - The fan that changed 73 * @param[in] skipRulesCheck - If the rules checks should be done now. 74 */ 75 void fanStatusChange(const Fan& fan, bool skipRulesCheck = false); 76 77 /** 78 * @brief Called when a fan sensor's error timer expires, which 79 * happens when the sensor has been nonfunctional for a 80 * certain amount of time. An event log will be created. 81 * 82 * @param[in] fan - The parent fan of the sensor 83 * @param[in] sensor - The faulted sensor 84 */ 85 void sensorErrorTimerExpired(const Fan& fan, const TachSensor& sensor); 86 87 /** 88 * @brief Called when the timer that starts when a fan is missing 89 * has expired so an event log needs to be created. 90 * 91 * @param[in] fan - The missing fan. 92 */ 93 void fanMissingErrorTimerExpired(const Fan& fan); 94 95 /** 96 * @brief Called by the power off actions to log an error when there is 97 * a power off due to fan problems. 98 * 99 * The error it logs is just the last fan error that occurred. 100 */ 101 void logShutdownError(); 102 103 /** 104 * @brief Returns true if power is on 105 */ 106 bool isPowerOn() const 107 { 108 return _powerState->isPowerOn(); 109 } 110 111 /** 112 * @brief Parses and populates the fan monitor 113 * trust groups and list of fans 114 * 115 * @param[in] confFile - The config file path 116 */ 117 void start( 118 #ifdef MONITOR_USE_JSON 119 const std::string& confFile 120 #endif 121 ); 122 123 private: 124 /* The mode of fan monitor */ 125 Mode _mode; 126 127 /* The sdbusplus bus object */ 128 sdbusplus::bus::bus& _bus; 129 130 /* The event loop reference */ 131 const sdeventplus::Event& _event; 132 133 /* Trust manager of trust groups */ 134 std::unique_ptr<phosphor::fan::trust::Manager> _trust; 135 136 /* List of fan objects to monitor */ 137 std::vector<std::unique_ptr<Fan>> _fans; 138 139 /** 140 * @brief The latest health of all the fans 141 */ 142 FanHealth _fanHealth; 143 144 /** 145 * @brief The object to watch the power state 146 */ 147 std::unique_ptr<PowerState> _powerState; 148 149 /** 150 * @brief The power off rules, for shutting down the system 151 * due to fan failures. 152 */ 153 std::vector<std::unique_ptr<PowerOffRule>> _powerOffRules; 154 155 /** 156 * @brief The number of concurrently nonfunctional fan sensors 157 * there must be for an event log created due to a 158 * nonfunctional fan sensor to have an Error severity as 159 * opposed to an Informational one. 160 */ 161 std::optional<size_t> _numNonfuncSensorsBeforeError; 162 163 /** 164 * @brief The most recently committed fan error. 165 */ 166 std::unique_ptr<FanError> _lastError; 167 168 /** 169 * @brief The thermal alert D-Bus object 170 */ 171 ThermalAlertObject _thermalAlert; 172 173 /** 174 * @brief If start() has been called 175 */ 176 bool _started = false; 177 178 /** 179 * @brief Captures tach sensor data as JSON for use in 180 * fan fault and fan missing event logs. 181 * 182 * @return json - The JSON data 183 */ 184 json captureSensorData(); 185 186 /** 187 * @brief Retrieve the configured trust groups 188 * 189 * @param[in] jsonObj - JSON object to parse from 190 * 191 * @return List of functions applied on trust groups 192 */ 193 const std::vector<CreateGroupFunction> getTrustGroups(const json& jsonObj); 194 195 /** 196 * @brief Set the trust manager's list of trust group functions 197 * 198 * @param[in] groupFuncs - list of trust group functions 199 */ 200 void setTrustMgr(const std::vector<CreateGroupFunction>& groupFuncs); 201 202 /** 203 * @brief Retrieve the configured fan definitions 204 * 205 * @param[in] jsonObj - JSON object to parse from 206 * 207 * @return List of fan definition data on the fans configured 208 */ 209 const std::vector<FanDefinition> getFanDefinitions(const json& jsonObj); 210 211 /** 212 * @brief Set the list of fans to be monitored 213 * 214 * @param[in] fanDefs - list of fan definitions to create fans monitored 215 */ 216 void setFans(const std::vector<FanDefinition>& fanDefs); 217 218 /** 219 * @brief Updates the fan health map entry for the fan passed in 220 * 221 * @param[in] fan - The fan to update the health map with 222 */ 223 void updateFanHealth(const Fan& fan); 224 225 /** 226 * @brief The function that runs when the power state changes 227 * 228 * @param[in] powerStateOn - If power is now on or not 229 */ 230 void powerStateChanged(bool powerStateOn); 231 232 /** 233 * @brief Reads the fault configuration from the JSON config 234 * file, such as the power off rule configuration. 235 * 236 * @param[in] jsonObj - JSON object to parse from 237 */ 238 void setFaultConfig(const json& jsonObj); 239 }; 240 241 } // namespace phosphor::fan::monitor 242