1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "power_supply.hpp" 19 20 #include "elog-errors.hpp" 21 #include "gpio.hpp" 22 #include "names_values.hpp" 23 #include "pmbus.hpp" 24 #include "types.hpp" 25 #include "utility.hpp" 26 27 #include <org/open_power/Witherspoon/Fault/error.hpp> 28 #include <phosphor-logging/log.hpp> 29 #include <xyz/openbmc_project/Common/Device/error.hpp> 30 31 #include <functional> 32 33 namespace phosphor 34 { 35 namespace power 36 { 37 namespace psu 38 { 39 40 using namespace phosphor::logging; 41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 43 44 PowerSupply::PowerSupply(const std::string& name, size_t inst, 45 const std::string& objpath, const std::string& invpath, 46 sdbusplus::bus_t& bus, const sdeventplus::Event& e, 47 std::chrono::seconds& t, std::chrono::seconds& p) : 48 Device(name, inst), monitorPath(objpath), pmbusIntf(objpath), 49 inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p), 50 presentTimer(e, std::bind([this]() { 51 // The hwmon path may have changed. 52 pmbusIntf.findHwmonDir(); 53 this->present = true; 54 55 // Sync the INPUT_HISTORY data for all PSs 56 syncHistory(); 57 58 // Update the inventory for the new device 59 updateInventory(); 60 })), 61 powerOnInterval(t), 62 powerOnTimer(e, std::bind([this]() { this->powerOn = true; })) 63 { 64 getAccessType(); 65 66 using namespace sdbusplus::bus; 67 using namespace phosphor::pmbus; 68 std::uint16_t statusWord = 0; 69 try 70 { 71 // Read the 2 byte STATUS_WORD value to check for faults. 72 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 73 if (!((statusWord & status_word::INPUT_FAULT_WARN) || 74 (statusWord & status_word::VIN_UV_FAULT))) 75 { 76 resolveError(inventoryPath, 77 std::string(PowerSupplyInputFault::errName)); 78 } 79 } 80 catch (const ReadFailure& e) 81 { 82 log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check " 83 "for power-supply input faults."); 84 } 85 presentMatch = std::make_unique<match_t>( 86 bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE), 87 [this](auto& msg) { this->inventoryChanged(msg); }); 88 // Get initial presence state. 89 updatePresence(); 90 91 // Write the SN, PN, etc to the inventory 92 updateInventory(); 93 94 // Subscribe to power state changes 95 powerOnMatch = std::make_unique<match_t>( 96 bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE), 97 [this](auto& msg) { this->powerStateChanged(msg); }); 98 // Get initial power state. 99 updatePowerState(); 100 } 101 102 void PowerSupply::getAccessType() 103 { 104 using namespace phosphor::power::util; 105 fruJson = loadJSONFromFile(PSU_JSON_PATH); 106 if (fruJson == nullptr) 107 { 108 log<level::ERR>("InternalFailure when parsing the JSON file"); 109 return; 110 } 111 inventoryPMBusAccessType = getPMBusAccessType(fruJson); 112 } 113 114 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 115 phosphor::pmbus::Type type) 116 { 117 if (pmbusIntf.exists(cmd, type)) 118 { 119 try 120 { 121 auto val = pmbusIntf.read(cmd, type); 122 nv.add(cmd, val); 123 } 124 catch (const std::exception& e) 125 { 126 log<level::INFO>("Unable to capture metadata", 127 entry("CMD=%s", cmd.c_str())); 128 } 129 } 130 } 131 132 void PowerSupply::analyze() 133 { 134 using namespace phosphor::pmbus; 135 136 try 137 { 138 if (present) 139 { 140 std::uint16_t statusWord = 0; 141 142 // Read the 2 byte STATUS_WORD value to check for faults. 143 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 144 readFail = 0; 145 146 checkInputFault(statusWord); 147 148 if (powerOn && (inputFault == 0) && !faultFound) 149 { 150 checkFanFault(statusWord); 151 checkTemperatureFault(statusWord); 152 checkOutputOvervoltageFault(statusWord); 153 checkCurrentOutOverCurrentFault(statusWord); 154 checkPGOrUnitOffFault(statusWord); 155 } 156 157 updateHistory(); 158 } 159 } 160 catch (const ReadFailure& e) 161 { 162 if (readFail < FAULT_COUNT) 163 { 164 readFail++; 165 } 166 167 if (!readFailLogged && readFail >= FAULT_COUNT) 168 { 169 commit<ReadFailure>(); 170 readFailLogged = true; 171 } 172 } 173 174 return; 175 } 176 177 void PowerSupply::inventoryChanged(sdbusplus::message_t& msg) 178 { 179 std::string msgSensor; 180 std::map<std::string, std::variant<uint32_t, bool>> msgData; 181 msg.read(msgSensor, msgData); 182 183 // Check if it was the Present property that changed. 184 auto valPropMap = msgData.find(PRESENT_PROP); 185 if (valPropMap != msgData.end()) 186 { 187 if (std::get<bool>(valPropMap->second)) 188 { 189 clearFaults(); 190 presentTimer.restartOnce(presentInterval); 191 } 192 else 193 { 194 present = false; 195 presentTimer.setEnabled(false); 196 197 // Clear out the now outdated inventory properties 198 updateInventory(); 199 } 200 } 201 202 return; 203 } 204 205 void PowerSupply::updatePresence() 206 { 207 // Use getProperty utility function to get presence status. 208 std::string service = "xyz.openbmc_project.Inventory.Manager"; 209 util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service, 210 bus, this->present); 211 } 212 213 void PowerSupply::powerStateChanged(sdbusplus::message_t& msg) 214 { 215 int32_t state = 0; 216 std::string msgSensor; 217 std::map<std::string, std::variant<int32_t>> msgData; 218 msg.read(msgSensor, msgData); 219 220 // Check if it was the Present property that changed. 221 auto valPropMap = msgData.find("state"); 222 if (valPropMap != msgData.end()) 223 { 224 state = std::get<int32_t>(valPropMap->second); 225 226 // Power is on when state=1. Set the fault logged variables to false 227 // and start the power on timer when the state changes to 1. 228 if (state) 229 { 230 clearFaults(); 231 powerOnTimer.restartOnce(powerOnInterval); 232 } 233 else 234 { 235 powerOnTimer.setEnabled(false); 236 powerOn = false; 237 } 238 } 239 } 240 241 void PowerSupply::updatePowerState() 242 { 243 powerOn = util::isPoweredOn(bus); 244 } 245 246 void PowerSupply::checkInputFault(const uint16_t statusWord) 247 { 248 using namespace phosphor::pmbus; 249 250 if ((inputFault < FAULT_COUNT) && 251 ((statusWord & status_word::INPUT_FAULT_WARN) || 252 (statusWord & status_word::VIN_UV_FAULT))) 253 { 254 if (inputFault == 0) 255 { 256 log<level::INFO>("INPUT or VIN_UV fault", 257 entry("STATUS_WORD=0x%04X", statusWord)); 258 } 259 260 inputFault++; 261 } 262 else 263 { 264 if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) && 265 !(statusWord & status_word::VIN_UV_FAULT)) 266 { 267 inputFault = 0; 268 faultFound = false; 269 // When an input fault occurs, the power supply cannot be on. 270 // However, the check for the case where the power supply should be 271 // on will stop when there is a fault found. 272 // Clear the powerOnFault when the inputFault is cleared to reset 273 // the powerOnFault de-glitching. 274 powerOnFault = 0; 275 276 log<level::INFO>("INPUT_FAULT_WARN cleared", 277 entry("POWERSUPPLY=%s", inventoryPath.c_str())); 278 279 resolveError(inventoryPath, 280 std::string(PowerSupplyInputFault::errName)); 281 282 if (powerOn) 283 { 284 // The power supply will not be immediately powered on after 285 // the input power is restored. 286 powerOn = false; 287 // Start up the timer that will set the state to indicate we 288 // are ready for the powered on fault checks. 289 powerOnTimer.restartOnce(powerOnInterval); 290 } 291 } 292 } 293 294 if (!faultFound && (inputFault >= FAULT_COUNT)) 295 { 296 // If the power is on, report the fault in an error log entry. 297 if (powerOn) 298 { 299 util::NamesValues nv; 300 nv.add("STATUS_WORD", statusWord); 301 captureCmd(nv, STATUS_INPUT, Type::Debug); 302 303 using metadata = 304 org::open_power::Witherspoon::Fault::PowerSupplyInputFault; 305 306 report<PowerSupplyInputFault>( 307 metadata::RAW_STATUS(nv.get().c_str()), 308 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 309 310 faultFound = true; 311 } 312 } 313 } 314 315 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 316 { 317 using namespace phosphor::pmbus; 318 319 if (powerOnFault < FAULT_COUNT) 320 { 321 // Check PG# and UNIT_IS_OFF 322 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 323 (statusWord & status_word::UNIT_IS_OFF)) 324 { 325 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 326 entry("STATUS_WORD=0x%04X", statusWord)); 327 powerOnFault++; 328 } 329 else 330 { 331 if (powerOnFault > 0) 332 { 333 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 334 powerOnFault = 0; 335 } 336 } 337 338 if (!faultFound && (powerOnFault >= FAULT_COUNT)) 339 { 340 faultFound = true; 341 342 util::NamesValues nv; 343 nv.add("STATUS_WORD", statusWord); 344 captureCmd(nv, STATUS_INPUT, Type::Debug); 345 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 346 captureCmd(nv, status0Vout, Type::Debug); 347 captureCmd(nv, STATUS_IOUT, Type::Debug); 348 captureCmd(nv, STATUS_MFR, Type::Debug); 349 350 using metadata = 351 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn; 352 353 // A power supply is OFF (or pgood low) but should be on. 354 report<PowerSupplyShouldBeOn>( 355 metadata::RAW_STATUS(nv.get().c_str()), 356 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 357 } 358 } 359 } 360 361 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 362 { 363 using namespace phosphor::pmbus; 364 365 if (outputOCFault < FAULT_COUNT) 366 { 367 // Check for an output overcurrent fault. 368 if ((statusWord & status_word::IOUT_OC_FAULT)) 369 { 370 outputOCFault++; 371 } 372 else 373 { 374 if (outputOCFault > 0) 375 { 376 outputOCFault = 0; 377 } 378 } 379 380 if (!faultFound && (outputOCFault >= FAULT_COUNT)) 381 { 382 util::NamesValues nv; 383 nv.add("STATUS_WORD", statusWord); 384 captureCmd(nv, STATUS_INPUT, Type::Debug); 385 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 386 captureCmd(nv, status0Vout, Type::Debug); 387 captureCmd(nv, STATUS_IOUT, Type::Debug); 388 captureCmd(nv, STATUS_MFR, Type::Debug); 389 390 using metadata = org::open_power::Witherspoon::Fault:: 391 PowerSupplyOutputOvercurrent; 392 393 report<PowerSupplyOutputOvercurrent>( 394 metadata::RAW_STATUS(nv.get().c_str()), 395 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 396 397 faultFound = true; 398 } 399 } 400 } 401 402 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 403 { 404 using namespace phosphor::pmbus; 405 406 if (outputOVFault < FAULT_COUNT) 407 { 408 // Check for an output overvoltage fault. 409 if (statusWord & status_word::VOUT_OV_FAULT) 410 { 411 outputOVFault++; 412 } 413 else 414 { 415 if (outputOVFault > 0) 416 { 417 outputOVFault = 0; 418 } 419 } 420 421 if (!faultFound && (outputOVFault >= FAULT_COUNT)) 422 { 423 util::NamesValues nv; 424 nv.add("STATUS_WORD", statusWord); 425 captureCmd(nv, STATUS_INPUT, Type::Debug); 426 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 427 captureCmd(nv, status0Vout, Type::Debug); 428 captureCmd(nv, STATUS_IOUT, Type::Debug); 429 captureCmd(nv, STATUS_MFR, Type::Debug); 430 431 using metadata = org::open_power::Witherspoon::Fault:: 432 PowerSupplyOutputOvervoltage; 433 434 report<PowerSupplyOutputOvervoltage>( 435 metadata::RAW_STATUS(nv.get().c_str()), 436 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 437 438 faultFound = true; 439 } 440 } 441 } 442 443 void PowerSupply::checkFanFault(const uint16_t statusWord) 444 { 445 using namespace phosphor::pmbus; 446 447 if (fanFault < FAULT_COUNT) 448 { 449 // Check for a fan fault or warning condition 450 if (statusWord & status_word::FAN_FAULT) 451 { 452 fanFault++; 453 } 454 else 455 { 456 if (fanFault > 0) 457 { 458 fanFault = 0; 459 } 460 } 461 462 if (!faultFound && (fanFault >= FAULT_COUNT)) 463 { 464 util::NamesValues nv; 465 nv.add("STATUS_WORD", statusWord); 466 captureCmd(nv, STATUS_MFR, Type::Debug); 467 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 468 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 469 470 using metadata = 471 org::open_power::Witherspoon::Fault::PowerSupplyFanFault; 472 473 report<PowerSupplyFanFault>( 474 metadata::RAW_STATUS(nv.get().c_str()), 475 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 476 477 faultFound = true; 478 } 479 } 480 } 481 482 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 483 { 484 using namespace phosphor::pmbus; 485 486 // Due to how the PMBus core device driver sends a clear faults command 487 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 488 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 489 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 490 // logging the over-temperature condition. 491 std::uint8_t statusTemperature = 0; 492 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 493 if (temperatureFault < FAULT_COUNT) 494 { 495 if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 496 (statusTemperature & status_temperature::OT_FAULT)) 497 { 498 temperatureFault++; 499 } 500 else 501 { 502 if (temperatureFault > 0) 503 { 504 temperatureFault = 0; 505 } 506 } 507 508 if (!faultFound && (temperatureFault >= FAULT_COUNT)) 509 { 510 // The power supply has had an over-temperature condition. 511 // This may not result in a shutdown if experienced for a short 512 // duration. 513 // This should not occur under normal conditions. 514 // The power supply may be faulty, or the paired supply may be 515 // putting out less current. 516 // Capture command responses with potentially relevant information, 517 // and call out the power supply reporting the condition. 518 util::NamesValues nv; 519 nv.add("STATUS_WORD", statusWord); 520 captureCmd(nv, STATUS_MFR, Type::Debug); 521 captureCmd(nv, STATUS_IOUT, Type::Debug); 522 nv.add("STATUS_TEMPERATURE", statusTemperature); 523 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 524 525 using metadata = org::open_power::Witherspoon::Fault:: 526 PowerSupplyTemperatureFault; 527 528 report<PowerSupplyTemperatureFault>( 529 metadata::RAW_STATUS(nv.get().c_str()), 530 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 531 532 faultFound = true; 533 } 534 } 535 } 536 537 void PowerSupply::clearFaults() 538 { 539 readFail = 0; 540 readFailLogged = false; 541 inputFault = 0; 542 powerOnFault = 0; 543 outputOCFault = 0; 544 outputOVFault = 0; 545 fanFault = 0; 546 temperatureFault = 0; 547 faultFound = false; 548 549 return; 550 } 551 552 void PowerSupply::resolveError(const std::string& callout, 553 const std::string& message) 554 { 555 using EndpointList = std::vector<std::string>; 556 557 try 558 { 559 auto path = callout + "/fault"; 560 // Get the service name from the mapper for the fault callout 561 auto service = util::getService(path, ASSOCIATION_IFACE, bus); 562 563 // Use getProperty utility function to get log entries (endpoints) 564 EndpointList logEntries; 565 util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus, 566 logEntries); 567 568 // It is possible that all such entries for this callout have since 569 // been deleted. 570 if (logEntries.empty()) 571 { 572 return; 573 } 574 575 auto logEntryService = 576 util::getService(logEntries[0], LOGGING_IFACE, bus); 577 if (logEntryService.empty()) 578 { 579 return; 580 } 581 582 // go through each log entry that matches this callout path 583 std::string logMessage; 584 for (const auto& logEntry : logEntries) 585 { 586 // Check to see if this logEntry has a message that matches. 587 util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry, 588 logEntryService, bus, logMessage); 589 590 if (message == logMessage) 591 { 592 // Log entry matches call out and message, set Resolved to true 593 bool resolved = true; 594 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry, 595 logEntryService, bus, resolved); 596 } 597 } 598 } 599 catch (const std::exception& e) 600 { 601 log<level::INFO>("Failed to resolve error", 602 entry("CALLOUT=%s", callout.c_str()), 603 entry("ERROR=%s", message.c_str())); 604 } 605 } 606 607 void PowerSupply::updateInventory() 608 { 609 using namespace phosphor::pmbus; 610 using namespace sdbusplus::message; 611 612 // Build the object map and send it to the inventory 613 using Properties = std::map<std::string, std::variant<std::string, bool>>; 614 using Interfaces = std::map<std::string, Properties>; 615 using Object = std::map<object_path, Interfaces>; 616 Properties assetProps; 617 Properties operProps; 618 Interfaces interfaces; 619 Object object; 620 621 // If any of these accesses fail, the fields will just be 622 // blank in the inventory. Leave logging ReadFailure errors 623 // to analyze() as it runs continuously and will most 624 // likely hit and threshold them first anyway. The 625 // readString() function will do the tracing of the failing 626 // path so this code doesn't need to. 627 for (const auto& fru : fruJson.at("fruConfigs")) 628 { 629 if (fru.at("interface") == ASSET_IFACE) 630 { 631 try 632 { 633 assetProps.emplace( 634 fru.at("propertyName"), 635 present ? pmbusIntf.readString(fru.at("fileName"), 636 inventoryPMBusAccessType) 637 : ""); 638 } 639 catch (const ReadFailure& e) 640 {} 641 } 642 } 643 644 operProps.emplace(FUNCTIONAL_PROP, present); 645 interfaces.emplace(ASSET_IFACE, std::move(assetProps)); 646 interfaces.emplace(OPERATIONAL_STATE_IFACE, std::move(operProps)); 647 648 // For Notify(), just send the relative path of the inventory 649 // object so remove the INVENTORY_OBJ_PATH prefix 650 auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH)); 651 652 object.emplace(path, std::move(interfaces)); 653 654 try 655 { 656 auto service = 657 util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE, bus); 658 659 if (service.empty()) 660 { 661 log<level::ERR>("Unable to get inventory manager service"); 662 return; 663 } 664 665 auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH, 666 INVENTORY_MGR_IFACE, "Notify"); 667 668 method.append(std::move(object)); 669 670 auto reply = bus.call(method); 671 } 672 catch (const std::exception& e) 673 { 674 log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str())); 675 } 676 } 677 678 void PowerSupply::syncHistory() 679 { 680 using namespace phosphor::gpio; 681 682 if (syncGPIODevPath.empty()) 683 { 684 // Sync not implemented 685 return; 686 } 687 688 GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber), 689 Direction::output}; 690 691 try 692 { 693 gpio.set(Value::low); 694 695 std::this_thread::sleep_for(std::chrono::milliseconds{5}); 696 697 gpio.set(Value::high); 698 699 recordManager->clear(); 700 } 701 catch (const std::exception& e) 702 { 703 // Do nothing. There would already be a journal entry. 704 } 705 } 706 707 void PowerSupply::enableHistory( 708 const std::string& objectPath, size_t numRecords, 709 const std::string& syncGPIOPath, size_t syncGPIONum) 710 { 711 historyObjectPath = objectPath; 712 syncGPIODevPath = syncGPIOPath; 713 syncGPIONumber = syncGPIONum; 714 715 recordManager = std::make_unique<history::RecordManager>(numRecords); 716 717 auto avgPath = historyObjectPath + '/' + history::Average::name; 718 auto maxPath = historyObjectPath + '/' + history::Maximum::name; 719 720 average = std::make_unique<history::Average>(bus, avgPath); 721 722 maximum = std::make_unique<history::Maximum>(bus, maxPath); 723 } 724 725 void PowerSupply::updateHistory() 726 { 727 if (!recordManager) 728 { 729 // Not enabled 730 return; 731 } 732 733 // Read just the most recent average/max record 734 auto data = 735 pmbusIntf.readBinary(INPUT_HISTORY, pmbus::Type::HwmonDeviceDebug, 736 history::RecordManager::RAW_RECORD_SIZE); 737 738 // Update D-Bus only if something changed (a new record ID, or cleared out) 739 auto changed = recordManager->add(data); 740 if (changed) 741 { 742 average->values(recordManager->getAverageRecords()); 743 maximum->values(recordManager->getMaximumRecords()); 744 } 745 } 746 747 } // namespace psu 748 } // namespace power 749 } // namespace phosphor 750