1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "phase_fault_detection.hpp" 18 19 #include "action_utils.hpp" 20 #include "chassis.hpp" 21 #include "device.hpp" 22 #include "error_logging.hpp" 23 #include "error_logging_utils.hpp" 24 #include "exception_utils.hpp" 25 #include "journal.hpp" 26 #include "system.hpp" 27 28 #include <exception> 29 #include <map> 30 31 namespace phosphor::power::regulators 32 { 33 34 /** 35 * Maximum number of action errors to write to the journal. 36 */ 37 constexpr unsigned short maxActionErrorCount{3}; 38 39 /** 40 * Number of consecutive phase faults required to log an error. This provides 41 * "de-glitching" to ignore transient hardware problems. 42 */ 43 constexpr unsigned short requiredConsecutiveFaults{2}; 44 45 void PhaseFaultDetection::execute(Services& services, System& system, 46 Chassis& /*chassis*/, Device& regulator) 47 { 48 try 49 { 50 // Find the device ID to use. If the deviceID data member is empty, use 51 // the ID of the specified regulator. 52 const std::string& effectiveDeviceID = 53 deviceID.empty() ? regulator.getID() : deviceID; 54 55 // Create ActionEnvironment 56 ActionEnvironment environment{system.getIDMap(), effectiveDeviceID, 57 services}; 58 59 // Execute the actions to detect phase faults 60 action_utils::execute(actions, environment); 61 62 // Check for any N or N+1 phase faults that were detected 63 checkForPhaseFault(PhaseFaultType::n, services, regulator, environment); 64 checkForPhaseFault(PhaseFaultType::n_plus_1, services, regulator, 65 environment); 66 } 67 catch (const std::exception& e) 68 { 69 // Log error messages in journal if we haven't hit the max 70 if (actionErrorCount < maxActionErrorCount) 71 { 72 ++actionErrorCount; 73 services.getJournal().logError(exception_utils::getMessages(e)); 74 services.getJournal().logError( 75 "Unable to detect phase faults in regulator " + 76 regulator.getID()); 77 } 78 79 // Create error log entry if this type hasn't already been logged 80 error_logging_utils::logError(std::current_exception(), 81 Entry::Level::Warning, services, 82 errorHistory); 83 } 84 } 85 86 void PhaseFaultDetection::checkForPhaseFault( 87 PhaseFaultType faultType, Services& services, Device& regulator, 88 ActionEnvironment& environment) 89 { 90 // Find ErrorType that corresponds to PhaseFaultType; used by ErrorHistory 91 ErrorType errorType = toErrorType(faultType); 92 93 // If this error has not been logged yet 94 if (!errorHistory.wasLogged(errorType)) 95 { 96 // Create reference to consecutive fault count data member 97 unsigned short& faultCount = 98 (faultType == PhaseFaultType::n) ? nFaultCount : nPlus1FaultCount; 99 100 // Check if the phase fault was detected 101 if (environment.getPhaseFaults().count(faultType) == 0) 102 { 103 // Phase fault not detected; reset consecutive fault count 104 faultCount = 0; 105 } 106 else 107 { 108 // Phase fault detected; increment consecutive fault count 109 ++faultCount; 110 111 // Log error message in journal 112 services.getJournal().logError( 113 toString(faultType) + " phase fault detected in regulator " + 114 regulator.getID() + ": count=" + std::to_string(faultCount)); 115 116 // If the required number of consecutive faults have been detected 117 if (faultCount >= requiredConsecutiveFaults) 118 { 119 // Log phase fault error and update ErrorHistory 120 logPhaseFault(faultType, services, regulator, environment); 121 errorHistory.setWasLogged(errorType, true); 122 } 123 } 124 } 125 } 126 127 void PhaseFaultDetection::logPhaseFault(PhaseFaultType faultType, 128 Services& services, Device& regulator, 129 ActionEnvironment& environment) 130 { 131 ErrorLogging& errorLogging = services.getErrorLogging(); 132 Entry::Level severity = (faultType == PhaseFaultType::n) 133 ? Entry::Level::Warning 134 : Entry::Level::Informational; 135 Journal& journal = services.getJournal(); 136 const std::string& inventoryPath = regulator.getFRU(); 137 const std::map<std::string, std::string>& additionalData = 138 environment.getAdditionalErrorData(); 139 errorLogging.logPhaseFault(severity, journal, faultType, inventoryPath, 140 additionalData); 141 } 142 143 } // namespace phosphor::power::regulators 144