xref: /openbmc/phosphor-power/phosphor-regulators/src/phase_fault_detection.hpp (revision f54021972b91be5058b50e9046bb0dd5a3b22a80)
1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include "action.hpp"
19 #include "action_environment.hpp"
20 #include "error_history.hpp"
21 #include "phase_fault.hpp"
22 #include "services.hpp"
23 
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 namespace phosphor::power::regulators
30 {
31 
32 // Forward declarations to avoid circular dependencies
33 class Chassis;
34 class Device;
35 class System;
36 
37 /**
38  * @class PhaseFaultDetection
39  *
40  * Detects and logs redundant phase faults in a voltage regulator.
41  *
42  * A voltage regulator is sometimes called a "phase controller" because it
43  * controls one or more phases that perform the actual voltage regulation.
44  *
45  * A regulator may have redundant phases.  If a redundant phase fails, the
46  * regulator will continue to provide the desired output voltage.  However, a
47  * phase fault error should be logged warning the user that the regulator has
48  * lost redundancy.
49  *
50  * The technique used to detect a phase fault varies depending on the regulator
51  * hardware.  Often a bit is checked in a status register.  The status register
52  * could exist in the regulator or in a related I/O expander.
53  *
54  * Phase fault detection is executed repeatedly based on a timer.  A phase fault
55  * must be detected two consecutive times before an error is logged.  This
56  * provides "de-glitching" to ignore transient hardware problems.
57  *
58  * Phase faults are detected by executing actions.
59  */
60 class PhaseFaultDetection
61 {
62   public:
63     // Specify which compiler-generated methods we want
64     PhaseFaultDetection() = delete;
65     PhaseFaultDetection(const PhaseFaultDetection&) = delete;
66     PhaseFaultDetection(PhaseFaultDetection&&) = delete;
67     PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete;
68     PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete;
69     ~PhaseFaultDetection() = default;
70 
71     /**
72      * Constructor.
73      *
74      * @param actions Actions that detect phase faults in the regulator.
75      * @param deviceID Unique ID of the device to use when detecting phase
76      *                 faults.  If not specified, the regulator will be used.
77      */
PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,const std::string & deviceID="")78     explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,
79                                  const std::string& deviceID = "") :
80         actions{std::move(actions)}, deviceID{deviceID}
81     {}
82 
83     /**
84      * Clears all error history.
85      *
86      * All data on previously logged errors will be deleted.  If errors occur
87      * again in the future they will be logged again.
88      *
89      * This method is normally called when the system is being powered on.
90      */
clearErrorHistory()91     void clearErrorHistory()
92     {
93         errorHistory.clear();
94         actionErrorCount = 0;
95         nFaultCount = 0;
96         nPlus1FaultCount = 0;
97     }
98 
99     /**
100      * Executes the actions that detect phase faults in the regulator.
101      *
102      * If the required number of consecutive phase faults are detected, an error
103      * is logged.
104      *
105      * @param services system services like error logging and the journal
106      * @param system system that contains the chassis
107      * @param chassis chassis that contains the regulator device
108      * @param regulator voltage regulator device
109      */
110     void execute(Services& services, System& system, Chassis& chassis,
111                  Device& regulator);
112 
113     /**
114      * Returns the actions that detect phase faults in the regulator.
115      *
116      * @return actions
117      */
getActions() const118     const std::vector<std::unique_ptr<Action>>& getActions() const
119     {
120         return actions;
121     }
122 
123     /**
124      * Returns the unique ID of the device to use when detecting phase
125      * faults.
126      *
127      * If the value is "", the regulator will be used.
128      *
129      * @return device ID
130      */
getDeviceID() const131     const std::string& getDeviceID() const
132     {
133         return deviceID;
134     }
135 
136   private:
137     /**
138      * Checks if the specified phase fault type was detected.
139      *
140      * If the fault type was detected, increments the counter tracking
141      * consecutive faults.  If the required number of consecutive faults have
142      * been detected, logs a phase fault error.
143      *
144      * The ActionEnvironment contains the set of phase fault types that were
145      * detected (if any).
146      *
147      * @param faultType phase fault type to check
148      * @param services system services like error logging and the journal
149      * @param regulator voltage regulator device
150      * @param environment action execution environment
151      */
152     void checkForPhaseFault(PhaseFaultType faultType, Services& services,
153                             Device& regulator, ActionEnvironment& environment);
154 
155     /**
156      * Logs an error for the specified phase fault type.
157      *
158      * @param faultType phase fault type that occurred
159      * @param services system services like error logging and the journal
160      * @param regulator voltage regulator device
161      * @param environment action execution environment
162      */
163     void logPhaseFault(PhaseFaultType faultType, Services& services,
164                        Device& regulator, ActionEnvironment& environment);
165 
166     /**
167      * Actions that detect phase faults in the regulator.
168      */
169     std::vector<std::unique_ptr<Action>> actions{};
170 
171     /**
172      * Unique ID of the device to use when detecting phase faults.
173      *
174      * Sometimes a separate device, such as an I/O expander, is accessed to
175      * obtain the phase fault status for a regulator.
176      *
177      * If the value is "", the regulator will be used.
178      */
179     const std::string deviceID{};
180 
181     /**
182      * History of which error types have been logged.
183      *
184      * Since phase fault detection runs repeatedly based on a timer, each error
185      * type is only logged once.
186      */
187     ErrorHistory errorHistory{};
188 
189     /**
190      * Number of errors that have occurred while executing actions, resulting in
191      * an exception.
192      */
193     unsigned short actionErrorCount{0};
194 
195     /**
196      * Number of consecutive N phase faults that have been detected.
197      */
198     unsigned short nFaultCount{0};
199 
200     /**
201      * Number of consecutive N+1 phase faults that have been detected.
202      */
203     unsigned short nPlus1FaultCount{0};
204 };
205 
206 } // namespace phosphor::power::regulators
207