1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include "action.hpp" 19 #include "action_environment.hpp" 20 #include "error_history.hpp" 21 #include "phase_fault.hpp" 22 #include "services.hpp" 23 24 #include <memory> 25 #include <string> 26 #include <utility> 27 #include <vector> 28 29 namespace phosphor::power::regulators 30 { 31 32 // Forward declarations to avoid circular dependencies 33 class Chassis; 34 class Device; 35 class System; 36 37 /** 38 * @class PhaseFaultDetection 39 * 40 * Detects and logs redundant phase faults in a voltage regulator. 41 * 42 * A voltage regulator is sometimes called a "phase controller" because it 43 * controls one or more phases that perform the actual voltage regulation. 44 * 45 * A regulator may have redundant phases. If a redundant phase fails, the 46 * regulator will continue to provide the desired output voltage. However, a 47 * phase fault error should be logged warning the user that the regulator has 48 * lost redundancy. 49 * 50 * The technique used to detect a phase fault varies depending on the regulator 51 * hardware. Often a bit is checked in a status register. The status register 52 * could exist in the regulator or in a related I/O expander. 53 * 54 * Phase fault detection is executed repeatedly based on a timer. A phase fault 55 * must be detected two consecutive times before an error is logged. This 56 * provides "de-glitching" to ignore transient hardware problems. 57 * 58 * Phase faults are detected by executing actions. 59 */ 60 class PhaseFaultDetection 61 { 62 public: 63 // Specify which compiler-generated methods we want 64 PhaseFaultDetection() = delete; 65 PhaseFaultDetection(const PhaseFaultDetection&) = delete; 66 PhaseFaultDetection(PhaseFaultDetection&&) = delete; 67 PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete; 68 PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete; 69 ~PhaseFaultDetection() = default; 70 71 /** 72 * Constructor. 73 * 74 * @param actions Actions that detect phase faults in the regulator. 75 * @param deviceID Unique ID of the device to use when detecting phase 76 * faults. If not specified, the regulator will be used. 77 */ 78 explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions, 79 const std::string& deviceID = "") : 80 actions{std::move(actions)}, 81 deviceID{deviceID} 82 {} 83 84 /** 85 * Clears all error history. 86 * 87 * All data on previously logged errors will be deleted. If errors occur 88 * again in the future they will be logged again. 89 * 90 * This method is normally called when the system is being powered on. 91 */ 92 void clearErrorHistory() 93 { 94 errorHistory.clear(); 95 actionErrorCount = 0; 96 nFaultCount = 0; 97 nPlus1FaultCount = 0; 98 } 99 100 /** 101 * Executes the actions that detect phase faults in the regulator. 102 * 103 * If the required number of consecutive phase faults are detected, an error 104 * is logged. 105 * 106 * @param services system services like error logging and the journal 107 * @param system system that contains the chassis 108 * @param chassis chassis that contains the regulator device 109 * @param regulator voltage regulator device 110 */ 111 void execute(Services& services, System& system, Chassis& chassis, 112 Device& regulator); 113 114 /** 115 * Returns the actions that detect phase faults in the regulator. 116 * 117 * @return actions 118 */ 119 const std::vector<std::unique_ptr<Action>>& getActions() const 120 { 121 return actions; 122 } 123 124 /** 125 * Returns the unique ID of the device to use when detecting phase 126 * faults. 127 * 128 * If the value is "", the regulator will be used. 129 * 130 * @return device ID 131 */ 132 const std::string& getDeviceID() const 133 { 134 return deviceID; 135 } 136 137 private: 138 /** 139 * Checks if the specified phase fault type was detected. 140 * 141 * If the fault type was detected, increments the counter tracking 142 * consecutive faults. If the required number of consecutive faults have 143 * been detected, logs a phase fault error. 144 * 145 * The ActionEnvironment contains the set of phase fault types that were 146 * detected (if any). 147 * 148 * @param faultType phase fault type to check 149 * @param services system services like error logging and the journal 150 * @param regulator voltage regulator device 151 * @param environment action execution environment 152 */ 153 void checkForPhaseFault(PhaseFaultType faultType, Services& services, 154 Device& regulator, ActionEnvironment& environment); 155 156 /** 157 * Logs an error for the specified phase fault type. 158 * 159 * @param faultType phase fault type that occurred 160 * @param services system services like error logging and the journal 161 * @param regulator voltage regulator device 162 * @param environment action execution environment 163 */ 164 void logPhaseFault(PhaseFaultType faultType, Services& services, 165 Device& regulator, ActionEnvironment& environment); 166 167 /** 168 * Actions that detect phase faults in the regulator. 169 */ 170 std::vector<std::unique_ptr<Action>> actions{}; 171 172 /** 173 * Unique ID of the device to use when detecting phase faults. 174 * 175 * Sometimes a separate device, such as an I/O expander, is accessed to 176 * obtain the phase fault status for a regulator. 177 * 178 * If the value is "", the regulator will be used. 179 */ 180 const std::string deviceID{}; 181 182 /** 183 * History of which error types have been logged. 184 * 185 * Since phase fault detection runs repeatedly based on a timer, each error 186 * type is only logged once. 187 */ 188 ErrorHistory errorHistory{}; 189 190 /** 191 * Number of errors that have occurred while executing actions, resulting in 192 * an exception. 193 */ 194 unsigned short actionErrorCount{0}; 195 196 /** 197 * Number of consecutive N phase faults that have been detected. 198 */ 199 unsigned short nFaultCount{0}; 200 201 /** 202 * Number of consecutive N+1 phase faults that have been detected. 203 */ 204 unsigned short nPlus1FaultCount{0}; 205 }; 206 207 } // namespace phosphor::power::regulators 208