1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <phosphor-logging/log.hpp> 17 #include <phosphor-logging/elog.hpp> 18 #include <org/open_power/Witherspoon/Fault/error.hpp> 19 #include <xyz/openbmc_project/Common/Device/error.hpp> 20 #include "elog-errors.hpp" 21 #include "names_values.hpp" 22 #include "power_supply.hpp" 23 #include "pmbus.hpp" 24 #include "utility.hpp" 25 26 using namespace phosphor::logging; 27 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 28 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 29 30 namespace witherspoon 31 { 32 namespace power 33 { 34 namespace psu 35 { 36 37 constexpr auto INVENTORY_OBJ_PATH = "/xyz/openbmc_project/inventory"; 38 constexpr auto INVENTORY_INTERFACE = "xyz.openbmc_project.Inventory.Item"; 39 constexpr auto PRESENT_PROP = "Present"; 40 constexpr auto POWER_OBJ_PATH = "/org/openbmc/control/power0"; 41 constexpr auto POWER_INTERFACE = "org.openbmc.control.Power"; 42 43 PowerSupply::PowerSupply(const std::string& name, size_t inst, 44 const std::string& objpath, 45 const std::string& invpath, 46 sdbusplus::bus::bus& bus, 47 event::Event& e, 48 std::chrono::seconds& t) 49 : Device(name, inst), monitorPath(objpath), pmbusIntf(objpath), 50 inventoryPath(invpath), bus(bus), event(e), powerOnInterval(t), 51 powerOnTimer(e, [this]() 52 { 53 this->powerOn = true; 54 }) 55 { 56 using namespace sdbusplus::bus; 57 auto present_obj_path = INVENTORY_OBJ_PATH + inventoryPath; 58 presentMatch = std::make_unique<match_t>(bus, 59 match::rules::propertiesChanged( 60 present_obj_path, 61 INVENTORY_INTERFACE), 62 [this](auto& msg) 63 { 64 this->inventoryChanged(msg); 65 }); 66 // Get initial presence state. 67 updatePresence(); 68 69 // Subscribe to power state changes 70 powerOnMatch = std::make_unique<match_t>(bus, 71 match::rules::propertiesChanged( 72 POWER_OBJ_PATH, 73 POWER_INTERFACE), 74 [this](auto& msg) 75 { 76 this->powerStateChanged(msg); 77 }); 78 // Get initial power state. 79 updatePowerState(); 80 } 81 82 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 83 witherspoon::pmbus::Type type) 84 { 85 if (pmbusIntf.exists(cmd, type)) 86 { 87 try 88 { 89 auto val = pmbusIntf.read(cmd, type); 90 nv.add(cmd, val); 91 } 92 catch (std::exception& e) 93 { 94 log<level::INFO>("Unable to capture metadata", entry("CMD=%s", 95 cmd)); 96 } 97 } 98 } 99 100 void PowerSupply::analyze() 101 { 102 using namespace witherspoon::pmbus; 103 104 try 105 { 106 if (present) 107 { 108 std::uint16_t statusWord = 0; 109 110 // Read the 2 byte STATUS_WORD value to check for faults. 111 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 112 113 //TODO: 3 consecutive reads should be performed. 114 // If 3 consecutive reads are seen, log the fault. 115 // Driver gives cached value, read once a second. 116 // increment for fault on, decrement for fault off, to deglitch. 117 // If count reaches 3, we have fault. If count reaches 0, fault is 118 // cleared. 119 120 checkInputFault(statusWord); 121 122 if (powerOn) 123 { 124 checkFanFault(statusWord); 125 checkTemperatureFault(statusWord); 126 checkOutputOvervoltageFault(statusWord); 127 checkCurrentOutOverCurrentFault(statusWord); 128 checkPGOrUnitOffFault(statusWord); 129 } 130 } 131 } 132 catch (ReadFailure& e) 133 { 134 if (!readFailLogged) 135 { 136 commit<ReadFailure>(); 137 readFailLogged = true; 138 } 139 } 140 141 return; 142 } 143 144 void PowerSupply::inventoryChanged(sdbusplus::message::message& msg) 145 { 146 std::string msgSensor; 147 std::map<std::string, sdbusplus::message::variant<uint32_t, bool>> msgData; 148 msg.read(msgSensor, msgData); 149 150 // Check if it was the Present property that changed. 151 auto valPropMap = msgData.find(PRESENT_PROP); 152 if (valPropMap != msgData.end()) 153 { 154 present = sdbusplus::message::variant_ns::get<bool>(valPropMap->second); 155 156 if (present) 157 { 158 readFailLogged = false; 159 vinUVFault = false; 160 inputFault = false; 161 outputOCFault = false; 162 outputOVFault = false; 163 fanFault = false; 164 temperatureFault = false; 165 } 166 } 167 168 return; 169 } 170 171 void PowerSupply::updatePresence() 172 { 173 // Use getProperty utility function to get presence status. 174 std::string path = INVENTORY_OBJ_PATH + inventoryPath; 175 std::string service = "xyz.openbmc_project.Inventory.Manager"; 176 177 util::getProperty(INVENTORY_INTERFACE, PRESENT_PROP, path,service, bus, 178 this->present); 179 } 180 181 void PowerSupply::powerStateChanged(sdbusplus::message::message& msg) 182 { 183 int32_t state = 0; 184 std::string msgSensor; 185 std::map<std::string, sdbusplus::message::variant<int32_t, int32_t>> 186 msgData; 187 msg.read(msgSensor, msgData); 188 189 // Check if it was the Present property that changed. 190 auto valPropMap = msgData.find("state"); 191 if (valPropMap != msgData.end()) 192 { 193 state = sdbusplus::message::variant_ns::get<int32_t>(valPropMap->second); 194 195 // Power is on when state=1. Set the fault logged variables to false 196 // and start the power on timer when the state changes to 1. 197 if (state) 198 { 199 readFailLogged = false; 200 vinUVFault = false; 201 inputFault = false; 202 powerOnFault = 0; 203 outputOCFault = false; 204 outputOVFault = false; 205 fanFault = false; 206 temperatureFault = false; 207 powerOnTimer.start(powerOnInterval, Timer::TimerType::oneshot); 208 } 209 else 210 { 211 powerOnTimer.stop(); 212 powerOn = false; 213 } 214 } 215 216 } 217 218 void PowerSupply::updatePowerState() 219 { 220 // When state = 1, system is powered on 221 int32_t state = 0; 222 223 try 224 { 225 auto service = util::getService(POWER_OBJ_PATH, 226 POWER_INTERFACE, 227 bus); 228 229 // Use getProperty utility function to get power state. 230 util::getProperty<int32_t>(POWER_INTERFACE, 231 "state", 232 POWER_OBJ_PATH, 233 service, 234 bus, 235 state); 236 237 if (state) 238 { 239 powerOn = true; 240 } 241 else 242 { 243 powerOn = false; 244 } 245 } 246 catch (std::exception& e) 247 { 248 log<level::INFO>("Failed to get power state. Assuming it is off."); 249 powerOn = false; 250 } 251 252 } 253 254 void PowerSupply::checkInputFault(const uint16_t statusWord) 255 { 256 using namespace witherspoon::pmbus; 257 258 std::uint8_t statusInput = 0; 259 260 if ((statusWord & status_word::VIN_UV_FAULT) && !vinUVFault) 261 { 262 vinUVFault = true; 263 264 util::NamesValues nv; 265 nv.add("STATUS_WORD", statusWord); 266 267 using metadata = org::open_power::Witherspoon::Fault:: 268 PowerSupplyUnderVoltageFault; 269 270 report<PowerSupplyUnderVoltageFault>(metadata::RAW_STATUS( 271 nv.get().c_str())); 272 } 273 else 274 { 275 if (vinUVFault) 276 { 277 vinUVFault = false; 278 log<level::INFO>("VIN_UV_FAULT cleared", 279 entry("POWERSUPPLY=%s", 280 inventoryPath.c_str())); 281 } 282 } 283 284 if ((statusWord & status_word::INPUT_FAULT_WARN) && !inputFault) 285 { 286 inputFault = true; 287 288 util::NamesValues nv; 289 nv.add("STATUS_WORD", statusWord); 290 captureCmd(nv, STATUS_INPUT, Type::Debug); 291 292 using metadata = org::open_power::Witherspoon::Fault:: 293 PowerSupplyInputFault; 294 295 report<PowerSupplyInputFault>( 296 metadata::RAW_STATUS(nv.get().c_str())); 297 } 298 else 299 { 300 if ((inputFault) && 301 !(statusWord & status_word::INPUT_FAULT_WARN)) 302 { 303 inputFault = false; 304 statusInput = pmbusIntf.read(STATUS_INPUT, Type::Debug); 305 306 log<level::INFO>("INPUT_FAULT_WARN cleared", 307 entry("POWERSUPPLY=%s", inventoryPath.c_str()), 308 entry("STATUS_WORD=0x%04X", statusWord), 309 entry("STATUS_INPUT=0x%02X", statusInput)); 310 } 311 } 312 } 313 314 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 315 { 316 using namespace witherspoon::pmbus; 317 318 if (powerOnFault < FAULT_COUNT) 319 { 320 // Check PG# and UNIT_IS_OFF 321 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 322 (statusWord & status_word::UNIT_IS_OFF)) 323 { 324 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 325 entry("STATUS_WORD=0x%04X", statusWord)); 326 powerOnFault++; 327 } 328 else 329 { 330 if (powerOnFault > 0) 331 { 332 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 333 powerOnFault = 0; 334 } 335 } 336 337 if (powerOnFault >= FAULT_COUNT) 338 { 339 util::NamesValues nv; 340 nv.add("STATUS_WORD", statusWord); 341 captureCmd(nv, STATUS_INPUT, Type::Debug); 342 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 343 captureCmd(nv, status0Vout, Type::Debug); 344 captureCmd(nv, STATUS_IOUT, Type::Debug); 345 captureCmd(nv, STATUS_MFR, Type::Debug); 346 347 using metadata = org::open_power::Witherspoon::Fault:: 348 PowerSupplyShouldBeOn; 349 350 // A power supply is OFF (or pgood low) but should be on. 351 report<PowerSupplyShouldBeOn>( 352 metadata::RAW_STATUS(nv.get().c_str()), 353 metadata::CALLOUT_INVENTORY_PATH( 354 inventoryPath.c_str())); 355 } 356 } 357 358 } 359 360 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 361 { 362 using namespace witherspoon::pmbus; 363 364 // Check for an output overcurrent fault. 365 if ((statusWord & status_word::IOUT_OC_FAULT) && 366 !outputOCFault) 367 { 368 util::NamesValues nv; 369 nv.add("STATUS_WORD", statusWord); 370 captureCmd(nv, STATUS_INPUT, Type::Debug); 371 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 372 captureCmd(nv, status0Vout, Type::Debug); 373 captureCmd(nv, STATUS_IOUT, Type::Debug); 374 captureCmd(nv, STATUS_MFR, Type::Debug); 375 376 using metadata = org::open_power::Witherspoon::Fault:: 377 PowerSupplyOutputOvercurrent; 378 379 report<PowerSupplyOutputOvercurrent>(metadata::RAW_STATUS( 380 nv.get().c_str()), 381 metadata::CALLOUT_INVENTORY_PATH( 382 inventoryPath.c_str())); 383 384 outputOCFault = true; 385 } 386 } 387 388 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 389 { 390 using namespace witherspoon::pmbus; 391 392 // Check for an output overvoltage fault. 393 if ((statusWord & status_word::VOUT_OV_FAULT) && 394 !outputOVFault) 395 { 396 util::NamesValues nv; 397 nv.add("STATUS_WORD", statusWord); 398 captureCmd(nv, STATUS_INPUT, Type::Debug); 399 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 400 captureCmd(nv, status0Vout, Type::Debug); 401 captureCmd(nv, STATUS_IOUT, Type::Debug); 402 captureCmd(nv, STATUS_MFR, Type::Debug); 403 404 using metadata = org::open_power::Witherspoon::Fault:: 405 PowerSupplyOutputOvervoltage; 406 407 report<PowerSupplyOutputOvervoltage>(metadata::RAW_STATUS( 408 nv.get().c_str()), 409 metadata::CALLOUT_INVENTORY_PATH( 410 inventoryPath.c_str())); 411 412 outputOVFault = true; 413 } 414 } 415 416 void PowerSupply::checkFanFault(const uint16_t statusWord) 417 { 418 using namespace witherspoon::pmbus; 419 420 // Check for a fan fault or warning condition 421 if ((statusWord & status_word::FAN_FAULT) && 422 !fanFault) 423 { 424 util::NamesValues nv; 425 nv.add("STATUS_WORD", statusWord); 426 captureCmd(nv, STATUS_MFR, Type::Debug); 427 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 428 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 429 430 using metadata = org::open_power::Witherspoon::Fault:: 431 PowerSupplyFanFault; 432 433 report<PowerSupplyFanFault>( 434 metadata::RAW_STATUS(nv.get().c_str()), 435 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 436 437 fanFault = true; 438 } 439 } 440 441 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 442 { 443 using namespace witherspoon::pmbus; 444 445 // Due to how the PMBus core device driver sends a clear faults command 446 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 447 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 448 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 449 // logging the over-temperature condition. 450 std::uint8_t statusTemperature = 0; 451 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 452 if (((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 453 (statusTemperature & status_temperature::OT_FAULT)) && 454 !temperatureFault) 455 { 456 // The power supply has had an over-temperature condition. 457 // This may not result in a shutdown if experienced for a short 458 // duration. 459 // This should not occur under normal conditions. 460 // The power supply may be faulty, or the paired supply may be putting 461 // out less current. 462 // Capture command responses with potentially relevant information, 463 // and call out the power supply reporting the condition. 464 util::NamesValues nv; 465 nv.add("STATUS_WORD", statusWord); 466 captureCmd(nv, STATUS_MFR, Type::Debug); 467 captureCmd(nv, STATUS_IOUT, Type::Debug); 468 nv.add("STATUS_TEMPERATURE", statusTemperature); 469 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 470 471 using metadata = org::open_power::Witherspoon::Fault:: 472 PowerSupplyTemperatureFault; 473 474 report<PowerSupplyTemperatureFault>( 475 metadata::RAW_STATUS(nv.get().c_str()), 476 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 477 478 temperatureFault = true; 479 } 480 } 481 482 void PowerSupply::clearFaults() 483 { 484 //TODO - Clear faults at pre-poweron. openbmc/openbmc#1736 485 return; 486 } 487 488 } 489 } 490 } 491