1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "phase_fault_detection.hpp" 18 19 #include "action_utils.hpp" 20 #include "chassis.hpp" 21 #include "device.hpp" 22 #include "error_logging.hpp" 23 #include "error_logging_utils.hpp" 24 #include "exception_utils.hpp" 25 #include "journal.hpp" 26 #include "system.hpp" 27 28 #include <exception> 29 #include <map> 30 31 namespace phosphor::power::regulators 32 { 33 34 /** 35 * Maximum number of action errors to write to the journal. 36 */ 37 constexpr unsigned short maxActionErrorCount{3}; 38 39 /** 40 * Number of consecutive phase faults required to log an error. This provides 41 * "de-glitching" to ignore transient hardware problems. 42 */ 43 constexpr unsigned short requiredConsecutiveFaults{2}; 44 45 void PhaseFaultDetection::execute(Services& services, System& system, 46 Chassis& /*chassis*/, Device& regulator) 47 { 48 try 49 { 50 // Find the device ID to use. If the deviceID data member is empty, use 51 // the ID of the specified regulator. 52 const std::string& effectiveDeviceID = 53 deviceID.empty() ? regulator.getID() : deviceID; 54 55 // Create ActionEnvironment 56 ActionEnvironment environment{system.getIDMap(), effectiveDeviceID, 57 services}; 58 59 // Execute the actions to detect phase faults 60 action_utils::execute(actions, environment); 61 62 // Check for any N or N+1 phase faults that were detected 63 checkForPhaseFault(PhaseFaultType::n, services, regulator, environment); 64 checkForPhaseFault(PhaseFaultType::n_plus_1, services, regulator, 65 environment); 66 } 67 catch (const std::exception& e) 68 { 69 // Log error messages in journal if we haven't hit the max 70 if (actionErrorCount < maxActionErrorCount) 71 { 72 ++actionErrorCount; 73 services.getJournal().logError(exception_utils::getMessages(e)); 74 services.getJournal().logError( 75 "Unable to detect phase faults in regulator " + 76 regulator.getID()); 77 } 78 79 // Create error log entry if this type hasn't already been logged 80 error_logging_utils::logError(std::current_exception(), 81 Entry::Level::Warning, services, 82 errorHistory); 83 } 84 } 85 86 void PhaseFaultDetection::checkForPhaseFault(PhaseFaultType faultType, 87 Services& services, 88 Device& regulator, 89 ActionEnvironment& environment) 90 { 91 // Find ErrorType that corresponds to PhaseFaultType; used by ErrorHistory 92 ErrorType errorType = toErrorType(faultType); 93 94 // If this error has not been logged yet 95 if (!errorHistory.wasLogged(errorType)) 96 { 97 // Create reference to consecutive fault count data member 98 unsigned short& faultCount = 99 (faultType == PhaseFaultType::n) ? nFaultCount : nPlus1FaultCount; 100 101 // Check if the phase fault was detected 102 if (environment.getPhaseFaults().count(faultType) == 0) 103 { 104 // Phase fault not detected; reset consecutive fault count 105 faultCount = 0; 106 } 107 else 108 { 109 // Phase fault detected; increment consecutive fault count 110 ++faultCount; 111 112 // Log error message in journal 113 services.getJournal().logError( 114 toString(faultType) + " phase fault detected in regulator " + 115 regulator.getID() + ": count=" + std::to_string(faultCount)); 116 117 // If the required number of consecutive faults have been detected 118 if (faultCount >= requiredConsecutiveFaults) 119 { 120 // Log phase fault error and update ErrorHistory 121 logPhaseFault(faultType, services, regulator, environment); 122 errorHistory.setWasLogged(errorType, true); 123 } 124 } 125 } 126 } 127 128 void PhaseFaultDetection::logPhaseFault(PhaseFaultType faultType, 129 Services& services, Device& regulator, 130 ActionEnvironment& environment) 131 { 132 ErrorLogging& errorLogging = services.getErrorLogging(); 133 Entry::Level severity = (faultType == PhaseFaultType::n) 134 ? Entry::Level::Warning 135 : Entry::Level::Informational; 136 Journal& journal = services.getJournal(); 137 const std::string& inventoryPath = regulator.getFRU(); 138 const std::map<std::string, std::string>& additionalData = 139 environment.getAdditionalErrorData(); 140 errorLogging.logPhaseFault(severity, journal, faultType, inventoryPath, 141 additionalData); 142 } 143 144 } // namespace phosphor::power::regulators 145