1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <phosphor-logging/log.hpp> 17 #include <phosphor-logging/elog.hpp> 18 #include <org/open_power/Witherspoon/Fault/error.hpp> 19 #include <xyz/openbmc_project/Common/Device/error.hpp> 20 #include "elog-errors.hpp" 21 #include "names_values.hpp" 22 #include "power_supply.hpp" 23 #include "pmbus.hpp" 24 #include "utility.hpp" 25 26 using namespace phosphor::logging; 27 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 28 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 29 30 namespace witherspoon 31 { 32 namespace power 33 { 34 namespace psu 35 { 36 37 constexpr auto INVENTORY_OBJ_PATH = "/xyz/openbmc_project/inventory"; 38 constexpr auto INVENTORY_INTERFACE = "xyz.openbmc_project.Inventory.Item"; 39 constexpr auto PRESENT_PROP = "Present"; 40 constexpr auto POWER_OBJ_PATH = "/org/openbmc/control/power0"; 41 constexpr auto POWER_INTERFACE = "org.openbmc.control.Power"; 42 43 PowerSupply::PowerSupply(const std::string& name, size_t inst, 44 const std::string& objpath, 45 const std::string& invpath, 46 sdbusplus::bus::bus& bus, 47 event::Event& e, 48 std::chrono::seconds& t, 49 std::chrono::seconds& p) 50 : Device(name, inst), monitorPath(objpath), pmbusIntf(objpath), 51 inventoryPath(invpath), bus(bus), event(e), presentInterval(p), 52 presentTimer(e, [this]() 53 { 54 this->present = true; 55 }), 56 powerOnInterval(t), 57 powerOnTimer(e, [this]() 58 { 59 this->powerOn = true; 60 }) 61 { 62 using namespace sdbusplus::bus; 63 auto present_obj_path = INVENTORY_OBJ_PATH + inventoryPath; 64 presentMatch = std::make_unique<match_t>(bus, 65 match::rules::propertiesChanged( 66 present_obj_path, 67 INVENTORY_INTERFACE), 68 [this](auto& msg) 69 { 70 this->inventoryChanged(msg); 71 }); 72 // Get initial presence state. 73 updatePresence(); 74 75 // Subscribe to power state changes 76 powerOnMatch = std::make_unique<match_t>(bus, 77 match::rules::propertiesChanged( 78 POWER_OBJ_PATH, 79 POWER_INTERFACE), 80 [this](auto& msg) 81 { 82 this->powerStateChanged(msg); 83 }); 84 // Get initial power state. 85 updatePowerState(); 86 } 87 88 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 89 witherspoon::pmbus::Type type) 90 { 91 if (pmbusIntf.exists(cmd, type)) 92 { 93 try 94 { 95 auto val = pmbusIntf.read(cmd, type); 96 nv.add(cmd, val); 97 } 98 catch (std::exception& e) 99 { 100 log<level::INFO>("Unable to capture metadata", entry("CMD=%s", 101 cmd)); 102 } 103 } 104 } 105 106 void PowerSupply::analyze() 107 { 108 using namespace witherspoon::pmbus; 109 110 try 111 { 112 if (present) 113 { 114 std::uint16_t statusWord = 0; 115 116 // Read the 2 byte STATUS_WORD value to check for faults. 117 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 118 119 //TODO: 3 consecutive reads should be performed. 120 // If 3 consecutive reads are seen, log the fault. 121 // Driver gives cached value, read once a second. 122 // increment for fault on, decrement for fault off, to deglitch. 123 // If count reaches 3, we have fault. If count reaches 0, fault is 124 // cleared. 125 126 checkInputFault(statusWord); 127 128 if (powerOn && !inputFault) 129 { 130 checkFanFault(statusWord); 131 checkTemperatureFault(statusWord); 132 checkOutputOvervoltageFault(statusWord); 133 checkCurrentOutOverCurrentFault(statusWord); 134 checkPGOrUnitOffFault(statusWord); 135 } 136 } 137 } 138 catch (ReadFailure& e) 139 { 140 if (!readFailLogged) 141 { 142 commit<ReadFailure>(); 143 readFailLogged = true; 144 } 145 } 146 147 return; 148 } 149 150 void PowerSupply::inventoryChanged(sdbusplus::message::message& msg) 151 { 152 std::string msgSensor; 153 std::map<std::string, sdbusplus::message::variant<uint32_t, bool>> msgData; 154 msg.read(msgSensor, msgData); 155 156 // Check if it was the Present property that changed. 157 auto valPropMap = msgData.find(PRESENT_PROP); 158 if (valPropMap != msgData.end()) 159 { 160 present = sdbusplus::message::variant_ns::get<bool>(valPropMap->second); 161 162 if (present) 163 { 164 clearFaults(); 165 presentTimer.start(presentInterval, Timer::TimerType::oneshot); 166 } 167 else 168 { 169 presentTimer.stop(); 170 } 171 } 172 173 return; 174 } 175 176 void PowerSupply::updatePresence() 177 { 178 // Use getProperty utility function to get presence status. 179 std::string path = INVENTORY_OBJ_PATH + inventoryPath; 180 std::string service = "xyz.openbmc_project.Inventory.Manager"; 181 182 util::getProperty(INVENTORY_INTERFACE, PRESENT_PROP, path,service, bus, 183 this->present); 184 } 185 186 void PowerSupply::powerStateChanged(sdbusplus::message::message& msg) 187 { 188 int32_t state = 0; 189 std::string msgSensor; 190 std::map<std::string, sdbusplus::message::variant<int32_t, int32_t>> 191 msgData; 192 msg.read(msgSensor, msgData); 193 194 // Check if it was the Present property that changed. 195 auto valPropMap = msgData.find("state"); 196 if (valPropMap != msgData.end()) 197 { 198 state = sdbusplus::message::variant_ns::get<int32_t>(valPropMap->second); 199 200 // Power is on when state=1. Set the fault logged variables to false 201 // and start the power on timer when the state changes to 1. 202 if (state) 203 { 204 clearFaults(); 205 powerOnTimer.start(powerOnInterval, Timer::TimerType::oneshot); 206 } 207 else 208 { 209 powerOnTimer.stop(); 210 powerOn = false; 211 } 212 } 213 214 } 215 216 void PowerSupply::updatePowerState() 217 { 218 // When state = 1, system is powered on 219 int32_t state = 0; 220 221 try 222 { 223 auto service = util::getService(POWER_OBJ_PATH, 224 POWER_INTERFACE, 225 bus); 226 227 // Use getProperty utility function to get power state. 228 util::getProperty<int32_t>(POWER_INTERFACE, 229 "state", 230 POWER_OBJ_PATH, 231 service, 232 bus, 233 state); 234 235 if (state) 236 { 237 powerOn = true; 238 } 239 else 240 { 241 powerOn = false; 242 } 243 } 244 catch (std::exception& e) 245 { 246 log<level::INFO>("Failed to get power state. Assuming it is off."); 247 powerOn = false; 248 } 249 250 } 251 252 void PowerSupply::checkInputFault(const uint16_t statusWord) 253 { 254 using namespace witherspoon::pmbus; 255 256 std::uint8_t statusInput = 0; 257 258 if (!inputFault && ((statusWord & status_word::INPUT_FAULT_WARN) || 259 (statusWord & status_word::VIN_UV_FAULT))) 260 { 261 inputFault = true; 262 263 util::NamesValues nv; 264 nv.add("STATUS_WORD", statusWord); 265 captureCmd(nv, STATUS_INPUT, Type::Debug); 266 267 using metadata = org::open_power::Witherspoon::Fault:: 268 PowerSupplyInputFault; 269 270 report<PowerSupplyInputFault>( 271 metadata::RAW_STATUS(nv.get().c_str()), 272 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 273 } 274 else 275 { 276 if ((inputFault) && 277 !(statusWord & status_word::INPUT_FAULT_WARN) && 278 !(statusWord & status_word::VIN_UV_FAULT)) 279 { 280 inputFault = false; 281 282 statusInput = pmbusIntf.read(STATUS_INPUT, Type::Debug); 283 284 log<level::INFO>("INPUT_FAULT_WARN cleared", 285 entry("POWERSUPPLY=%s", inventoryPath.c_str()), 286 entry("STATUS_WORD=0x%04X", statusWord), 287 entry("STATUS_INPUT=0x%02X", statusInput)); 288 289 if (powerOn) 290 { 291 // The power supply will not be immediately powered on after 292 // the input power is restored. 293 powerOn = false; 294 // Start up the timer that will set the state to indicate we 295 // are ready for the powered on fault checks. 296 powerOnTimer.start(powerOnInterval, Timer::TimerType::oneshot); 297 } 298 } 299 } 300 } 301 302 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 303 { 304 using namespace witherspoon::pmbus; 305 306 if (powerOnFault < FAULT_COUNT) 307 { 308 // Check PG# and UNIT_IS_OFF 309 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 310 (statusWord & status_word::UNIT_IS_OFF)) 311 { 312 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 313 entry("STATUS_WORD=0x%04X", statusWord)); 314 powerOnFault++; 315 } 316 else 317 { 318 if (powerOnFault > 0) 319 { 320 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 321 powerOnFault = 0; 322 } 323 } 324 325 if (powerOnFault >= FAULT_COUNT) 326 { 327 util::NamesValues nv; 328 nv.add("STATUS_WORD", statusWord); 329 captureCmd(nv, STATUS_INPUT, Type::Debug); 330 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 331 captureCmd(nv, status0Vout, Type::Debug); 332 captureCmd(nv, STATUS_IOUT, Type::Debug); 333 captureCmd(nv, STATUS_MFR, Type::Debug); 334 335 using metadata = org::open_power::Witherspoon::Fault:: 336 PowerSupplyShouldBeOn; 337 338 // A power supply is OFF (or pgood low) but should be on. 339 report<PowerSupplyShouldBeOn>( 340 metadata::RAW_STATUS(nv.get().c_str()), 341 metadata::CALLOUT_INVENTORY_PATH( 342 inventoryPath.c_str())); 343 } 344 } 345 346 } 347 348 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 349 { 350 using namespace witherspoon::pmbus; 351 352 // Check for an output overcurrent fault. 353 if ((statusWord & status_word::IOUT_OC_FAULT) && 354 !outputOCFault) 355 { 356 util::NamesValues nv; 357 nv.add("STATUS_WORD", statusWord); 358 captureCmd(nv, STATUS_INPUT, Type::Debug); 359 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 360 captureCmd(nv, status0Vout, Type::Debug); 361 captureCmd(nv, STATUS_IOUT, Type::Debug); 362 captureCmd(nv, STATUS_MFR, Type::Debug); 363 364 using metadata = org::open_power::Witherspoon::Fault:: 365 PowerSupplyOutputOvercurrent; 366 367 report<PowerSupplyOutputOvercurrent>(metadata::RAW_STATUS( 368 nv.get().c_str()), 369 metadata::CALLOUT_INVENTORY_PATH( 370 inventoryPath.c_str())); 371 372 outputOCFault = true; 373 } 374 } 375 376 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 377 { 378 using namespace witherspoon::pmbus; 379 380 // Check for an output overvoltage fault. 381 if ((statusWord & status_word::VOUT_OV_FAULT) && 382 !outputOVFault) 383 { 384 util::NamesValues nv; 385 nv.add("STATUS_WORD", statusWord); 386 captureCmd(nv, STATUS_INPUT, Type::Debug); 387 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 388 captureCmd(nv, status0Vout, Type::Debug); 389 captureCmd(nv, STATUS_IOUT, Type::Debug); 390 captureCmd(nv, STATUS_MFR, Type::Debug); 391 392 using metadata = org::open_power::Witherspoon::Fault:: 393 PowerSupplyOutputOvervoltage; 394 395 report<PowerSupplyOutputOvervoltage>(metadata::RAW_STATUS( 396 nv.get().c_str()), 397 metadata::CALLOUT_INVENTORY_PATH( 398 inventoryPath.c_str())); 399 400 outputOVFault = true; 401 } 402 } 403 404 void PowerSupply::checkFanFault(const uint16_t statusWord) 405 { 406 using namespace witherspoon::pmbus; 407 408 // Check for a fan fault or warning condition 409 if ((statusWord & status_word::FAN_FAULT) && 410 !fanFault) 411 { 412 util::NamesValues nv; 413 nv.add("STATUS_WORD", statusWord); 414 captureCmd(nv, STATUS_MFR, Type::Debug); 415 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 416 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 417 418 using metadata = org::open_power::Witherspoon::Fault:: 419 PowerSupplyFanFault; 420 421 report<PowerSupplyFanFault>( 422 metadata::RAW_STATUS(nv.get().c_str()), 423 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 424 425 fanFault = true; 426 } 427 } 428 429 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 430 { 431 using namespace witherspoon::pmbus; 432 433 // Due to how the PMBus core device driver sends a clear faults command 434 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 435 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 436 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 437 // logging the over-temperature condition. 438 std::uint8_t statusTemperature = 0; 439 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 440 if (((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 441 (statusTemperature & status_temperature::OT_FAULT)) && 442 !temperatureFault) 443 { 444 // The power supply has had an over-temperature condition. 445 // This may not result in a shutdown if experienced for a short 446 // duration. 447 // This should not occur under normal conditions. 448 // The power supply may be faulty, or the paired supply may be putting 449 // out less current. 450 // Capture command responses with potentially relevant information, 451 // and call out the power supply reporting the condition. 452 util::NamesValues nv; 453 nv.add("STATUS_WORD", statusWord); 454 captureCmd(nv, STATUS_MFR, Type::Debug); 455 captureCmd(nv, STATUS_IOUT, Type::Debug); 456 nv.add("STATUS_TEMPERATURE", statusTemperature); 457 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 458 459 using metadata = org::open_power::Witherspoon::Fault:: 460 PowerSupplyTemperatureFault; 461 462 report<PowerSupplyTemperatureFault>( 463 metadata::RAW_STATUS(nv.get().c_str()), 464 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 465 466 temperatureFault = true; 467 } 468 } 469 470 void PowerSupply::clearFaults() 471 { 472 readFailLogged = false; 473 inputFault = false; 474 powerOnFault = 0; 475 outputOCFault = false; 476 outputOVFault = false; 477 fanFault = false; 478 temperatureFault = false; 479 480 return; 481 } 482 483 } 484 } 485 } 486