1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include "action.hpp" 19 #include "action_environment.hpp" 20 #include "error_history.hpp" 21 #include "phase_fault.hpp" 22 #include "services.hpp" 23 24 #include <memory> 25 #include <string> 26 #include <utility> 27 #include <vector> 28 29 namespace phosphor::power::regulators 30 { 31 32 // Forward declarations to avoid circular dependencies 33 class Chassis; 34 class Device; 35 class System; 36 37 /** 38 * @class PhaseFaultDetection 39 * 40 * Detects and logs redundant phase faults in a voltage regulator. 41 * 42 * A voltage regulator is sometimes called a "phase controller" because it 43 * controls one or more phases that perform the actual voltage regulation. 44 * 45 * A regulator may have redundant phases. If a redundant phase fails, the 46 * regulator will continue to provide the desired output voltage. However, a 47 * phase fault error should be logged warning the user that the regulator has 48 * lost redundancy. 49 * 50 * The technique used to detect a phase fault varies depending on the regulator 51 * hardware. Often a bit is checked in a status register. The status register 52 * could exist in the regulator or in a related I/O expander. 53 * 54 * Phase fault detection is executed repeatedly based on a timer. A phase fault 55 * must be detected two consecutive times before an error is logged. This 56 * provides "de-glitching" to ignore transient hardware problems. 57 * 58 * Phase faults are detected by executing actions. 59 */ 60 class PhaseFaultDetection 61 { 62 public: 63 // Specify which compiler-generated methods we want 64 PhaseFaultDetection() = delete; 65 PhaseFaultDetection(const PhaseFaultDetection&) = delete; 66 PhaseFaultDetection(PhaseFaultDetection&&) = delete; 67 PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete; 68 PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete; 69 ~PhaseFaultDetection() = default; 70 71 /** 72 * Constructor. 73 * 74 * @param actions Actions that detect phase faults in the regulator. 75 * @param deviceID Unique ID of the device to use when detecting phase 76 * faults. If not specified, the regulator will be used. 77 */ PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,const std::string & deviceID="")78 explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions, 79 const std::string& deviceID = "") : 80 actions{std::move(actions)}, deviceID{deviceID} 81 {} 82 83 /** 84 * Clears all error history. 85 * 86 * All data on previously logged errors will be deleted. If errors occur 87 * again in the future they will be logged again. 88 * 89 * This method is normally called when the system is being powered on. 90 */ clearErrorHistory()91 void clearErrorHistory() 92 { 93 errorHistory.clear(); 94 actionErrorCount = 0; 95 nFaultCount = 0; 96 nPlus1FaultCount = 0; 97 } 98 99 /** 100 * Executes the actions that detect phase faults in the regulator. 101 * 102 * If the required number of consecutive phase faults are detected, an error 103 * is logged. 104 * 105 * @param services system services like error logging and the journal 106 * @param system system that contains the chassis 107 * @param chassis chassis that contains the regulator device 108 * @param regulator voltage regulator device 109 */ 110 void execute(Services& services, System& system, Chassis& chassis, 111 Device& regulator); 112 113 /** 114 * Returns the actions that detect phase faults in the regulator. 115 * 116 * @return actions 117 */ getActions() const118 const std::vector<std::unique_ptr<Action>>& getActions() const 119 { 120 return actions; 121 } 122 123 /** 124 * Returns the unique ID of the device to use when detecting phase 125 * faults. 126 * 127 * If the value is "", the regulator will be used. 128 * 129 * @return device ID 130 */ getDeviceID() const131 const std::string& getDeviceID() const 132 { 133 return deviceID; 134 } 135 136 private: 137 /** 138 * Checks if the specified phase fault type was detected. 139 * 140 * If the fault type was detected, increments the counter tracking 141 * consecutive faults. If the required number of consecutive faults have 142 * been detected, logs a phase fault error. 143 * 144 * The ActionEnvironment contains the set of phase fault types that were 145 * detected (if any). 146 * 147 * @param faultType phase fault type to check 148 * @param services system services like error logging and the journal 149 * @param regulator voltage regulator device 150 * @param environment action execution environment 151 */ 152 void checkForPhaseFault(PhaseFaultType faultType, Services& services, 153 Device& regulator, ActionEnvironment& environment); 154 155 /** 156 * Logs an error for the specified phase fault type. 157 * 158 * @param faultType phase fault type that occurred 159 * @param services system services like error logging and the journal 160 * @param regulator voltage regulator device 161 * @param environment action execution environment 162 */ 163 void logPhaseFault(PhaseFaultType faultType, Services& services, 164 Device& regulator, ActionEnvironment& environment); 165 166 /** 167 * Actions that detect phase faults in the regulator. 168 */ 169 std::vector<std::unique_ptr<Action>> actions{}; 170 171 /** 172 * Unique ID of the device to use when detecting phase faults. 173 * 174 * Sometimes a separate device, such as an I/O expander, is accessed to 175 * obtain the phase fault status for a regulator. 176 * 177 * If the value is "", the regulator will be used. 178 */ 179 const std::string deviceID{}; 180 181 /** 182 * History of which error types have been logged. 183 * 184 * Since phase fault detection runs repeatedly based on a timer, each error 185 * type is only logged once. 186 */ 187 ErrorHistory errorHistory{}; 188 189 /** 190 * Number of errors that have occurred while executing actions, resulting in 191 * an exception. 192 */ 193 unsigned short actionErrorCount{0}; 194 195 /** 196 * Number of consecutive N phase faults that have been detected. 197 */ 198 unsigned short nFaultCount{0}; 199 200 /** 201 * Number of consecutive N+1 phase faults that have been detected. 202 */ 203 unsigned short nPlus1FaultCount{0}; 204 }; 205 206 } // namespace phosphor::power::regulators 207