1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "power_supply.hpp" 19 20 #include "elog-errors.hpp" 21 #include "gpio.hpp" 22 #include "names_values.hpp" 23 #include "pmbus.hpp" 24 #include "types.hpp" 25 #include "utility.hpp" 26 27 #include <org/open_power/Witherspoon/Fault/error.hpp> 28 #include <phosphor-logging/log.hpp> 29 #include <xyz/openbmc_project/Common/Device/error.hpp> 30 31 #include <functional> 32 33 namespace phosphor 34 { 35 namespace power 36 { 37 namespace psu 38 { 39 40 using namespace phosphor::logging; 41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 43 44 PowerSupply::PowerSupply(const std::string& name, size_t inst, 45 const std::string& objpath, const std::string& invpath, 46 sdbusplus::bus::bus& bus, const sdeventplus::Event& e, 47 std::chrono::seconds& t, std::chrono::seconds& p) : 48 Device(name, inst), 49 monitorPath(objpath), pmbusIntf(objpath), 50 inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p), 51 presentTimer(e, std::bind([this]() { 52 // The hwmon path may have changed. 53 pmbusIntf.findHwmonDir(); 54 this->present = true; 55 56 // Sync the INPUT_HISTORY data for all PSs 57 syncHistory(); 58 59 // Update the inventory for the new device 60 updateInventory(); 61 })), 62 powerOnInterval(t), 63 powerOnTimer(e, std::bind([this]() { this->powerOn = true; })) 64 { 65 getAccessType(); 66 67 using namespace sdbusplus::bus; 68 using namespace phosphor::pmbus; 69 std::uint16_t statusWord = 0; 70 try 71 { 72 // Read the 2 byte STATUS_WORD value to check for faults. 73 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 74 if (!((statusWord & status_word::INPUT_FAULT_WARN) || 75 (statusWord & status_word::VIN_UV_FAULT))) 76 { 77 resolveError(inventoryPath, 78 std::string(PowerSupplyInputFault::errName)); 79 } 80 } 81 catch (ReadFailure& e) 82 { 83 log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check " 84 "for power-supply input faults."); 85 } 86 presentMatch = std::make_unique<match_t>( 87 bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE), 88 [this](auto& msg) { this->inventoryChanged(msg); }); 89 // Get initial presence state. 90 updatePresence(); 91 92 // Write the SN, PN, etc to the inventory 93 updateInventory(); 94 95 // Subscribe to power state changes 96 powerOnMatch = std::make_unique<match_t>( 97 bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE), 98 [this](auto& msg) { this->powerStateChanged(msg); }); 99 // Get initial power state. 100 updatePowerState(); 101 } 102 103 void PowerSupply::getAccessType() 104 { 105 using namespace phosphor::power::util; 106 fruJson = loadJSONFromFile(PSU_JSON_PATH); 107 if (fruJson == nullptr) 108 { 109 log<level::ERR>("InternalFailure when parsing the JSON file"); 110 return; 111 } 112 inventoryPMBusAccessType = getPMBusAccessType(fruJson); 113 } 114 115 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 116 phosphor::pmbus::Type type) 117 { 118 if (pmbusIntf.exists(cmd, type)) 119 { 120 try 121 { 122 auto val = pmbusIntf.read(cmd, type); 123 nv.add(cmd, val); 124 } 125 catch (std::exception& e) 126 { 127 log<level::INFO>("Unable to capture metadata", 128 entry("CMD=%s", cmd.c_str())); 129 } 130 } 131 } 132 133 void PowerSupply::analyze() 134 { 135 using namespace phosphor::pmbus; 136 137 try 138 { 139 if (present) 140 { 141 std::uint16_t statusWord = 0; 142 143 // Read the 2 byte STATUS_WORD value to check for faults. 144 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 145 readFail = 0; 146 147 checkInputFault(statusWord); 148 149 if (powerOn && (inputFault == 0) && !faultFound) 150 { 151 checkFanFault(statusWord); 152 checkTemperatureFault(statusWord); 153 checkOutputOvervoltageFault(statusWord); 154 checkCurrentOutOverCurrentFault(statusWord); 155 checkPGOrUnitOffFault(statusWord); 156 } 157 158 updateHistory(); 159 } 160 } 161 catch (ReadFailure& e) 162 { 163 if (readFail < FAULT_COUNT) 164 { 165 readFail++; 166 } 167 168 if (!readFailLogged && readFail >= FAULT_COUNT) 169 { 170 commit<ReadFailure>(); 171 readFailLogged = true; 172 } 173 } 174 175 return; 176 } 177 178 void PowerSupply::inventoryChanged(sdbusplus::message::message& msg) 179 { 180 std::string msgSensor; 181 std::map<std::string, sdbusplus::message::variant<uint32_t, bool>> msgData; 182 msg.read(msgSensor, msgData); 183 184 // Check if it was the Present property that changed. 185 auto valPropMap = msgData.find(PRESENT_PROP); 186 if (valPropMap != msgData.end()) 187 { 188 if (sdbusplus::message::variant_ns::get<bool>(valPropMap->second)) 189 { 190 clearFaults(); 191 presentTimer.restartOnce(presentInterval); 192 } 193 else 194 { 195 present = false; 196 presentTimer.setEnabled(false); 197 198 // Clear out the now outdated inventory properties 199 updateInventory(); 200 } 201 } 202 203 return; 204 } 205 206 void PowerSupply::updatePresence() 207 { 208 // Use getProperty utility function to get presence status. 209 std::string service = "xyz.openbmc_project.Inventory.Manager"; 210 util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service, 211 bus, this->present); 212 } 213 214 void PowerSupply::powerStateChanged(sdbusplus::message::message& msg) 215 { 216 int32_t state = 0; 217 std::string msgSensor; 218 std::map<std::string, sdbusplus::message::variant<int32_t>> msgData; 219 msg.read(msgSensor, msgData); 220 221 // Check if it was the Present property that changed. 222 auto valPropMap = msgData.find("state"); 223 if (valPropMap != msgData.end()) 224 { 225 state = 226 sdbusplus::message::variant_ns::get<int32_t>(valPropMap->second); 227 228 // Power is on when state=1. Set the fault logged variables to false 229 // and start the power on timer when the state changes to 1. 230 if (state) 231 { 232 clearFaults(); 233 powerOnTimer.restartOnce(powerOnInterval); 234 } 235 else 236 { 237 powerOnTimer.setEnabled(false); 238 powerOn = false; 239 } 240 } 241 } 242 243 void PowerSupply::updatePowerState() 244 { 245 powerOn = util::isPoweredOn(bus); 246 } 247 248 void PowerSupply::checkInputFault(const uint16_t statusWord) 249 { 250 using namespace phosphor::pmbus; 251 252 if ((inputFault < FAULT_COUNT) && 253 ((statusWord & status_word::INPUT_FAULT_WARN) || 254 (statusWord & status_word::VIN_UV_FAULT))) 255 { 256 if (inputFault == 0) 257 { 258 log<level::INFO>("INPUT or VIN_UV fault", 259 entry("STATUS_WORD=0x%04X", statusWord)); 260 } 261 262 inputFault++; 263 } 264 else 265 { 266 if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) && 267 !(statusWord & status_word::VIN_UV_FAULT)) 268 { 269 inputFault = 0; 270 faultFound = false; 271 // When an input fault occurs, the power supply cannot be on. 272 // However, the check for the case where the power supply should be 273 // on will stop when there is a fault found. 274 // Clear the powerOnFault when the inputFault is cleared to reset 275 // the powerOnFault de-glitching. 276 powerOnFault = 0; 277 278 log<level::INFO>("INPUT_FAULT_WARN cleared", 279 entry("POWERSUPPLY=%s", inventoryPath.c_str())); 280 281 resolveError(inventoryPath, 282 std::string(PowerSupplyInputFault::errName)); 283 284 if (powerOn) 285 { 286 // The power supply will not be immediately powered on after 287 // the input power is restored. 288 powerOn = false; 289 // Start up the timer that will set the state to indicate we 290 // are ready for the powered on fault checks. 291 powerOnTimer.restartOnce(powerOnInterval); 292 } 293 } 294 } 295 296 if (!faultFound && (inputFault >= FAULT_COUNT)) 297 { 298 // If the power is on, report the fault in an error log entry. 299 if (powerOn) 300 { 301 util::NamesValues nv; 302 nv.add("STATUS_WORD", statusWord); 303 captureCmd(nv, STATUS_INPUT, Type::Debug); 304 305 using metadata = 306 org::open_power::Witherspoon::Fault::PowerSupplyInputFault; 307 308 report<PowerSupplyInputFault>( 309 metadata::RAW_STATUS(nv.get().c_str()), 310 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 311 312 faultFound = true; 313 } 314 } 315 } 316 317 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 318 { 319 using namespace phosphor::pmbus; 320 321 if (powerOnFault < FAULT_COUNT) 322 { 323 // Check PG# and UNIT_IS_OFF 324 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 325 (statusWord & status_word::UNIT_IS_OFF)) 326 { 327 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 328 entry("STATUS_WORD=0x%04X", statusWord)); 329 powerOnFault++; 330 } 331 else 332 { 333 if (powerOnFault > 0) 334 { 335 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 336 powerOnFault = 0; 337 } 338 } 339 340 if (!faultFound && (powerOnFault >= FAULT_COUNT)) 341 { 342 faultFound = true; 343 344 util::NamesValues nv; 345 nv.add("STATUS_WORD", statusWord); 346 captureCmd(nv, STATUS_INPUT, Type::Debug); 347 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 348 captureCmd(nv, status0Vout, Type::Debug); 349 captureCmd(nv, STATUS_IOUT, Type::Debug); 350 captureCmd(nv, STATUS_MFR, Type::Debug); 351 352 using metadata = 353 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn; 354 355 // A power supply is OFF (or pgood low) but should be on. 356 report<PowerSupplyShouldBeOn>( 357 metadata::RAW_STATUS(nv.get().c_str()), 358 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 359 } 360 } 361 } 362 363 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 364 { 365 using namespace phosphor::pmbus; 366 367 if (outputOCFault < FAULT_COUNT) 368 { 369 // Check for an output overcurrent fault. 370 if ((statusWord & status_word::IOUT_OC_FAULT)) 371 { 372 outputOCFault++; 373 } 374 else 375 { 376 if (outputOCFault > 0) 377 { 378 outputOCFault = 0; 379 } 380 } 381 382 if (!faultFound && (outputOCFault >= FAULT_COUNT)) 383 { 384 util::NamesValues nv; 385 nv.add("STATUS_WORD", statusWord); 386 captureCmd(nv, STATUS_INPUT, Type::Debug); 387 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 388 captureCmd(nv, status0Vout, Type::Debug); 389 captureCmd(nv, STATUS_IOUT, Type::Debug); 390 captureCmd(nv, STATUS_MFR, Type::Debug); 391 392 using metadata = org::open_power::Witherspoon::Fault:: 393 PowerSupplyOutputOvercurrent; 394 395 report<PowerSupplyOutputOvercurrent>( 396 metadata::RAW_STATUS(nv.get().c_str()), 397 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 398 399 faultFound = true; 400 } 401 } 402 } 403 404 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 405 { 406 using namespace phosphor::pmbus; 407 408 if (outputOVFault < FAULT_COUNT) 409 { 410 // Check for an output overvoltage fault. 411 if (statusWord & status_word::VOUT_OV_FAULT) 412 { 413 outputOVFault++; 414 } 415 else 416 { 417 if (outputOVFault > 0) 418 { 419 outputOVFault = 0; 420 } 421 } 422 423 if (!faultFound && (outputOVFault >= FAULT_COUNT)) 424 { 425 util::NamesValues nv; 426 nv.add("STATUS_WORD", statusWord); 427 captureCmd(nv, STATUS_INPUT, Type::Debug); 428 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 429 captureCmd(nv, status0Vout, Type::Debug); 430 captureCmd(nv, STATUS_IOUT, Type::Debug); 431 captureCmd(nv, STATUS_MFR, Type::Debug); 432 433 using metadata = org::open_power::Witherspoon::Fault:: 434 PowerSupplyOutputOvervoltage; 435 436 report<PowerSupplyOutputOvervoltage>( 437 metadata::RAW_STATUS(nv.get().c_str()), 438 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 439 440 faultFound = true; 441 } 442 } 443 } 444 445 void PowerSupply::checkFanFault(const uint16_t statusWord) 446 { 447 using namespace phosphor::pmbus; 448 449 if (fanFault < FAULT_COUNT) 450 { 451 // Check for a fan fault or warning condition 452 if (statusWord & status_word::FAN_FAULT) 453 { 454 fanFault++; 455 } 456 else 457 { 458 if (fanFault > 0) 459 { 460 fanFault = 0; 461 } 462 } 463 464 if (!faultFound && (fanFault >= FAULT_COUNT)) 465 { 466 util::NamesValues nv; 467 nv.add("STATUS_WORD", statusWord); 468 captureCmd(nv, STATUS_MFR, Type::Debug); 469 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 470 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 471 472 using metadata = 473 org::open_power::Witherspoon::Fault::PowerSupplyFanFault; 474 475 report<PowerSupplyFanFault>( 476 metadata::RAW_STATUS(nv.get().c_str()), 477 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 478 479 faultFound = true; 480 } 481 } 482 } 483 484 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 485 { 486 using namespace phosphor::pmbus; 487 488 // Due to how the PMBus core device driver sends a clear faults command 489 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 490 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 491 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 492 // logging the over-temperature condition. 493 std::uint8_t statusTemperature = 0; 494 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 495 if (temperatureFault < FAULT_COUNT) 496 { 497 if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 498 (statusTemperature & status_temperature::OT_FAULT)) 499 { 500 temperatureFault++; 501 } 502 else 503 { 504 if (temperatureFault > 0) 505 { 506 temperatureFault = 0; 507 } 508 } 509 510 if (!faultFound && (temperatureFault >= FAULT_COUNT)) 511 { 512 // The power supply has had an over-temperature condition. 513 // This may not result in a shutdown if experienced for a short 514 // duration. 515 // This should not occur under normal conditions. 516 // The power supply may be faulty, or the paired supply may be 517 // putting out less current. 518 // Capture command responses with potentially relevant information, 519 // and call out the power supply reporting the condition. 520 util::NamesValues nv; 521 nv.add("STATUS_WORD", statusWord); 522 captureCmd(nv, STATUS_MFR, Type::Debug); 523 captureCmd(nv, STATUS_IOUT, Type::Debug); 524 nv.add("STATUS_TEMPERATURE", statusTemperature); 525 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 526 527 using metadata = org::open_power::Witherspoon::Fault:: 528 PowerSupplyTemperatureFault; 529 530 report<PowerSupplyTemperatureFault>( 531 metadata::RAW_STATUS(nv.get().c_str()), 532 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 533 534 faultFound = true; 535 } 536 } 537 } 538 539 void PowerSupply::clearFaults() 540 { 541 readFail = 0; 542 readFailLogged = false; 543 inputFault = 0; 544 powerOnFault = 0; 545 outputOCFault = 0; 546 outputOVFault = 0; 547 fanFault = 0; 548 temperatureFault = 0; 549 faultFound = false; 550 551 return; 552 } 553 554 void PowerSupply::resolveError(const std::string& callout, 555 const std::string& message) 556 { 557 using EndpointList = std::vector<std::string>; 558 559 try 560 { 561 auto path = callout + "/fault"; 562 // Get the service name from the mapper for the fault callout 563 auto service = util::getService(path, ASSOCIATION_IFACE, bus); 564 565 // Use getProperty utility function to get log entries (endpoints) 566 EndpointList logEntries; 567 util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus, 568 logEntries); 569 570 // It is possible that all such entries for this callout have since 571 // been deleted. 572 if (logEntries.empty()) 573 { 574 return; 575 } 576 577 auto logEntryService = 578 util::getService(logEntries[0], LOGGING_IFACE, bus); 579 if (logEntryService.empty()) 580 { 581 return; 582 } 583 584 // go through each log entry that matches this callout path 585 std::string logMessage; 586 for (const auto& logEntry : logEntries) 587 { 588 // Check to see if this logEntry has a message that matches. 589 util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry, 590 logEntryService, bus, logMessage); 591 592 if (message == logMessage) 593 { 594 // Log entry matches call out and message, set Resolved to true 595 bool resolved = true; 596 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry, 597 logEntryService, bus, resolved); 598 } 599 } 600 } 601 catch (std::exception& e) 602 { 603 log<level::INFO>("Failed to resolve error", 604 entry("CALLOUT=%s", callout.c_str()), 605 entry("ERROR=%s", message.c_str())); 606 } 607 } 608 609 void PowerSupply::updateInventory() 610 { 611 using namespace phosphor::pmbus; 612 using namespace sdbusplus::message; 613 614 // Build the object map and send it to the inventory 615 using Properties = std::map<std::string, variant<std::string>>; 616 using Interfaces = std::map<std::string, Properties>; 617 using Object = std::map<object_path, Interfaces>; 618 Properties assetProps; 619 Interfaces interfaces; 620 Object object; 621 622 // If any of these accesses fail, the fields will just be 623 // blank in the inventory. Leave logging ReadFailure errors 624 // to analyze() as it runs continuously and will most 625 // likely hit and threshold them first anyway. The 626 // readString() function will do the tracing of the failing 627 // path so this code doesn't need to. 628 for (const auto& fru : fruJson.at("fruConfigs")) 629 { 630 if (fru.at("interface") == ASSET_IFACE) 631 { 632 try 633 { 634 assetProps.emplace( 635 fru.at("propertyName"), 636 present ? pmbusIntf.readString(fru.at("fileName"), 637 inventoryPMBusAccessType) 638 : ""); 639 } 640 catch (ReadFailure& e) 641 { 642 } 643 } 644 } 645 646 interfaces.emplace(ASSET_IFACE, std::move(assetProps)); 647 648 // For Notify(), just send the relative path of the inventory 649 // object so remove the INVENTORY_OBJ_PATH prefix 650 auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH)); 651 652 object.emplace(path, std::move(interfaces)); 653 654 try 655 { 656 auto service = 657 util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE, bus); 658 659 if (service.empty()) 660 { 661 log<level::ERR>("Unable to get inventory manager service"); 662 return; 663 } 664 665 auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH, 666 INVENTORY_MGR_IFACE, "Notify"); 667 668 method.append(std::move(object)); 669 670 auto reply = bus.call(method); 671 } 672 catch (std::exception& e) 673 { 674 log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str())); 675 } 676 } 677 678 void PowerSupply::syncHistory() 679 { 680 using namespace phosphor::gpio; 681 682 if (syncGPIODevPath.empty()) 683 { 684 // Sync not implemented 685 return; 686 } 687 688 GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber), 689 Direction::output}; 690 691 try 692 { 693 gpio.set(Value::low); 694 695 std::this_thread::sleep_for(std::chrono::milliseconds{5}); 696 697 gpio.set(Value::high); 698 699 recordManager->clear(); 700 } 701 catch (std::exception& e) 702 { 703 // Do nothing. There would already be a journal entry. 704 } 705 } 706 707 void PowerSupply::enableHistory(const std::string& objectPath, 708 size_t numRecords, 709 const std::string& syncGPIOPath, 710 size_t syncGPIONum) 711 { 712 historyObjectPath = objectPath; 713 syncGPIODevPath = syncGPIOPath; 714 syncGPIONumber = syncGPIONum; 715 716 recordManager = std::make_unique<history::RecordManager>(numRecords); 717 718 auto avgPath = historyObjectPath + '/' + history::Average::name; 719 auto maxPath = historyObjectPath + '/' + history::Maximum::name; 720 721 average = std::make_unique<history::Average>(bus, avgPath); 722 723 maximum = std::make_unique<history::Maximum>(bus, maxPath); 724 } 725 726 void PowerSupply::updateHistory() 727 { 728 if (!recordManager) 729 { 730 // Not enabled 731 return; 732 } 733 734 // Read just the most recent average/max record 735 auto data = 736 pmbusIntf.readBinary(INPUT_HISTORY, pmbus::Type::HwmonDeviceDebug, 737 history::RecordManager::RAW_RECORD_SIZE); 738 739 // Update D-Bus only if something changed (a new record ID, or cleared out) 740 auto changed = recordManager->add(data); 741 if (changed) 742 { 743 average->values(std::move(recordManager->getAverageRecords())); 744 maximum->values(std::move(recordManager->getMaximumRecords())); 745 } 746 } 747 748 } // namespace psu 749 } // namespace power 750 } // namespace phosphor 751