1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include "action.hpp"
19 #include "action_environment.hpp"
20 #include "error_history.hpp"
21 #include "phase_fault.hpp"
22 #include "services.hpp"
23 
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 namespace phosphor::power::regulators
30 {
31 
32 // Forward declarations to avoid circular dependencies
33 class Chassis;
34 class Device;
35 class System;
36 
37 /**
38  * @class PhaseFaultDetection
39  *
40  * Detects and logs redundant phase faults in a voltage regulator.
41  *
42  * A voltage regulator is sometimes called a "phase controller" because it
43  * controls one or more phases that perform the actual voltage regulation.
44  *
45  * A regulator may have redundant phases.  If a redundant phase fails, the
46  * regulator will continue to provide the desired output voltage.  However, a
47  * phase fault error should be logged warning the user that the regulator has
48  * lost redundancy.
49  *
50  * The technique used to detect a phase fault varies depending on the regulator
51  * hardware.  Often a bit is checked in a status register.  The status register
52  * could exist in the regulator or in a related I/O expander.
53  *
54  * Phase fault detection is executed repeatedly based on a timer.  A phase fault
55  * must be detected two consecutive times before an error is logged.  This
56  * provides "de-glitching" to ignore transient hardware problems.
57  *
58  * Phase faults are detected by executing actions.
59  */
60 class PhaseFaultDetection
61 {
62   public:
63     // Specify which compiler-generated methods we want
64     PhaseFaultDetection() = delete;
65     PhaseFaultDetection(const PhaseFaultDetection&) = delete;
66     PhaseFaultDetection(PhaseFaultDetection&&) = delete;
67     PhaseFaultDetection& operator=(const PhaseFaultDetection&) = delete;
68     PhaseFaultDetection& operator=(PhaseFaultDetection&&) = delete;
69     ~PhaseFaultDetection() = default;
70 
71     /**
72      * Constructor.
73      *
74      * @param actions Actions that detect phase faults in the regulator.
75      * @param deviceID Unique ID of the device to use when detecting phase
76      *                 faults.  If not specified, the regulator will be used.
77      */
78     explicit PhaseFaultDetection(std::vector<std::unique_ptr<Action>> actions,
79                                  const std::string& deviceID = "") :
80         actions{std::move(actions)},
81         deviceID{deviceID}
82     {}
83 
84     /**
85      * Clears all error history.
86      *
87      * All data on previously logged errors will be deleted.  If errors occur
88      * again in the future they will be logged again.
89      *
90      * This method is normally called when the system is being powered on.
91      */
92     void clearErrorHistory()
93     {
94         errorHistory.clear();
95         actionErrorCount = 0;
96         nFaultCount = 0;
97         nPlus1FaultCount = 0;
98     }
99 
100     /**
101      * Executes the actions that detect phase faults in the regulator.
102      *
103      * If the required number of consecutive phase faults are detected, an error
104      * is logged.
105      *
106      * @param services system services like error logging and the journal
107      * @param system system that contains the chassis
108      * @param chassis chassis that contains the regulator device
109      * @param regulator voltage regulator device
110      */
111     void execute(Services& services, System& system, Chassis& chassis,
112                  Device& regulator);
113 
114     /**
115      * Returns the actions that detect phase faults in the regulator.
116      *
117      * @return actions
118      */
119     const std::vector<std::unique_ptr<Action>>& getActions() const
120     {
121         return actions;
122     }
123 
124     /**
125      * Returns the unique ID of the device to use when detecting phase
126      * faults.
127      *
128      * If the value is "", the regulator will be used.
129      *
130      * @return device ID
131      */
132     const std::string& getDeviceID() const
133     {
134         return deviceID;
135     }
136 
137   private:
138     /**
139      * Checks if the specified phase fault type was detected.
140      *
141      * If the fault type was detected, increments the counter tracking
142      * consecutive faults.  If the required number of consecutive faults have
143      * been detected, logs a phase fault error.
144      *
145      * The ActionEnvironment contains the set of phase fault types that were
146      * detected (if any).
147      *
148      * @param faultType phase fault type to check
149      * @param services system services like error logging and the journal
150      * @param regulator voltage regulator device
151      * @param environment action execution environment
152      */
153     void checkForPhaseFault(PhaseFaultType faultType, Services& services,
154                             Device& regulator, ActionEnvironment& environment);
155 
156     /**
157      * Logs an error for the specified phase fault type.
158      *
159      * @param faultType phase fault type that occurred
160      * @param services system services like error logging and the journal
161      * @param regulator voltage regulator device
162      * @param environment action execution environment
163      */
164     void logPhaseFault(PhaseFaultType faultType, Services& services,
165                        Device& regulator, ActionEnvironment& environment);
166 
167     /**
168      * Actions that detect phase faults in the regulator.
169      */
170     std::vector<std::unique_ptr<Action>> actions{};
171 
172     /**
173      * Unique ID of the device to use when detecting phase faults.
174      *
175      * Sometimes a separate device, such as an I/O expander, is accessed to
176      * obtain the phase fault status for a regulator.
177      *
178      * If the value is "", the regulator will be used.
179      */
180     const std::string deviceID{};
181 
182     /**
183      * History of which error types have been logged.
184      *
185      * Since phase fault detection runs repeatedly based on a timer, each error
186      * type is only logged once.
187      */
188     ErrorHistory errorHistory{};
189 
190     /**
191      * Number of errors that have occurred while executing actions, resulting in
192      * an exception.
193      */
194     unsigned short actionErrorCount{0};
195 
196     /**
197      * Number of consecutive N phase faults that have been detected.
198      */
199     unsigned short nFaultCount{0};
200 
201     /**
202      * Number of consecutive N+1 phase faults that have been detected.
203      */
204     unsigned short nPlus1FaultCount{0};
205 };
206 
207 } // namespace phosphor::power::regulators
208