1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "power_supply.hpp" 19 20 #include "elog-errors.hpp" 21 #include "gpio.hpp" 22 #include "names_values.hpp" 23 #include "pmbus.hpp" 24 #include "types.hpp" 25 #include "utility.hpp" 26 27 #include <org/open_power/Witherspoon/Fault/error.hpp> 28 #include <phosphor-logging/log.hpp> 29 #include <xyz/openbmc_project/Common/Device/error.hpp> 30 31 #include <functional> 32 33 namespace phosphor 34 { 35 namespace power 36 { 37 namespace psu 38 { 39 40 using namespace phosphor::logging; 41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 43 44 #ifdef __clang__ 45 #pragma clang diagnostic push 46 #pragma clang diagnostic ignored "-Wpessimizing-move" 47 #endif 48 PowerSupply::PowerSupply(const std::string& name, size_t inst, 49 const std::string& objpath, const std::string& invpath, 50 sdbusplus::bus_t& bus, const sdeventplus::Event& e, 51 std::chrono::seconds& t, std::chrono::seconds& p) : 52 Device(name, inst), monitorPath(objpath), pmbusIntf(objpath), 53 inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p), 54 presentTimer(e, std::bind([this]() { 55 // The hwmon path may have changed. 56 pmbusIntf.findHwmonDir(); 57 this->present = true; 58 59 // Sync the INPUT_HISTORY data for all PSs 60 syncHistory(); 61 62 // Update the inventory for the new device 63 updateInventory(); 64 })), 65 powerOnInterval(t), 66 powerOnTimer(e, std::bind([this]() { this->powerOn = true; })) 67 { 68 getAccessType(); 69 70 using namespace sdbusplus::bus; 71 using namespace phosphor::pmbus; 72 std::uint16_t statusWord = 0; 73 try 74 { 75 // Read the 2 byte STATUS_WORD value to check for faults. 76 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 77 if (!((statusWord & status_word::INPUT_FAULT_WARN) || 78 (statusWord & status_word::VIN_UV_FAULT))) 79 { 80 resolveError(inventoryPath, 81 std::string(PowerSupplyInputFault::errName)); 82 } 83 } 84 catch (const ReadFailure& e) 85 { 86 log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check " 87 "for power-supply input faults."); 88 } 89 presentMatch = std::make_unique<match_t>( 90 bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE), 91 [this](auto& msg) { this->inventoryChanged(msg); }); 92 // Get initial presence state. 93 updatePresence(); 94 95 // Write the SN, PN, etc to the inventory 96 updateInventory(); 97 98 // Subscribe to power state changes 99 powerOnMatch = std::make_unique<match_t>( 100 bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE), 101 [this](auto& msg) { this->powerStateChanged(msg); }); 102 // Get initial power state. 103 updatePowerState(); 104 } 105 #ifdef __clang__ 106 #pragma clang diagnostic pop 107 #endif 108 109 void PowerSupply::getAccessType() 110 { 111 using namespace phosphor::power::util; 112 fruJson = loadJSONFromFile(PSU_JSON_PATH); 113 if (fruJson == nullptr) 114 { 115 log<level::ERR>("InternalFailure when parsing the JSON file"); 116 return; 117 } 118 inventoryPMBusAccessType = getPMBusAccessType(fruJson); 119 } 120 121 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 122 phosphor::pmbus::Type type) 123 { 124 if (pmbusIntf.exists(cmd, type)) 125 { 126 try 127 { 128 auto val = pmbusIntf.read(cmd, type); 129 nv.add(cmd, val); 130 } 131 catch (const std::exception& e) 132 { 133 log<level::INFO>("Unable to capture metadata", 134 entry("CMD=%s", cmd.c_str())); 135 } 136 } 137 } 138 139 void PowerSupply::analyze() 140 { 141 using namespace phosphor::pmbus; 142 143 try 144 { 145 if (present) 146 { 147 std::uint16_t statusWord = 0; 148 149 // Read the 2 byte STATUS_WORD value to check for faults. 150 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 151 readFail = 0; 152 153 checkInputFault(statusWord); 154 155 if (powerOn && (inputFault == 0) && !faultFound) 156 { 157 checkFanFault(statusWord); 158 checkTemperatureFault(statusWord); 159 checkOutputOvervoltageFault(statusWord); 160 checkCurrentOutOverCurrentFault(statusWord); 161 checkPGOrUnitOffFault(statusWord); 162 } 163 164 updateHistory(); 165 } 166 } 167 catch (const ReadFailure& e) 168 { 169 if (readFail < FAULT_COUNT) 170 { 171 readFail++; 172 } 173 174 if (!readFailLogged && readFail >= FAULT_COUNT) 175 { 176 commit<ReadFailure>(); 177 readFailLogged = true; 178 } 179 } 180 181 return; 182 } 183 184 void PowerSupply::inventoryChanged(sdbusplus::message_t& msg) 185 { 186 std::string msgSensor; 187 std::map<std::string, std::variant<uint32_t, bool>> msgData; 188 msg.read(msgSensor, msgData); 189 190 // Check if it was the Present property that changed. 191 auto valPropMap = msgData.find(PRESENT_PROP); 192 if (valPropMap != msgData.end()) 193 { 194 if (std::get<bool>(valPropMap->second)) 195 { 196 clearFaults(); 197 presentTimer.restartOnce(presentInterval); 198 } 199 else 200 { 201 present = false; 202 presentTimer.setEnabled(false); 203 204 // Clear out the now outdated inventory properties 205 updateInventory(); 206 } 207 } 208 209 return; 210 } 211 212 void PowerSupply::updatePresence() 213 { 214 // Use getProperty utility function to get presence status. 215 std::string service = "xyz.openbmc_project.Inventory.Manager"; 216 util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service, 217 bus, this->present); 218 } 219 220 void PowerSupply::powerStateChanged(sdbusplus::message_t& msg) 221 { 222 int32_t state = 0; 223 std::string msgSensor; 224 std::map<std::string, std::variant<int32_t>> msgData; 225 msg.read(msgSensor, msgData); 226 227 // Check if it was the Present property that changed. 228 auto valPropMap = msgData.find("state"); 229 if (valPropMap != msgData.end()) 230 { 231 state = std::get<int32_t>(valPropMap->second); 232 233 // Power is on when state=1. Set the fault logged variables to false 234 // and start the power on timer when the state changes to 1. 235 if (state) 236 { 237 clearFaults(); 238 powerOnTimer.restartOnce(powerOnInterval); 239 } 240 else 241 { 242 powerOnTimer.setEnabled(false); 243 powerOn = false; 244 } 245 } 246 } 247 248 void PowerSupply::updatePowerState() 249 { 250 powerOn = util::isPoweredOn(bus); 251 } 252 253 void PowerSupply::checkInputFault(const uint16_t statusWord) 254 { 255 using namespace phosphor::pmbus; 256 257 if ((inputFault < FAULT_COUNT) && 258 ((statusWord & status_word::INPUT_FAULT_WARN) || 259 (statusWord & status_word::VIN_UV_FAULT))) 260 { 261 if (inputFault == 0) 262 { 263 log<level::INFO>("INPUT or VIN_UV fault", 264 entry("STATUS_WORD=0x%04X", statusWord)); 265 } 266 267 inputFault++; 268 } 269 else 270 { 271 if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) && 272 !(statusWord & status_word::VIN_UV_FAULT)) 273 { 274 inputFault = 0; 275 faultFound = false; 276 // When an input fault occurs, the power supply cannot be on. 277 // However, the check for the case where the power supply should be 278 // on will stop when there is a fault found. 279 // Clear the powerOnFault when the inputFault is cleared to reset 280 // the powerOnFault de-glitching. 281 powerOnFault = 0; 282 283 log<level::INFO>("INPUT_FAULT_WARN cleared", 284 entry("POWERSUPPLY=%s", inventoryPath.c_str())); 285 286 resolveError(inventoryPath, 287 std::string(PowerSupplyInputFault::errName)); 288 289 if (powerOn) 290 { 291 // The power supply will not be immediately powered on after 292 // the input power is restored. 293 powerOn = false; 294 // Start up the timer that will set the state to indicate we 295 // are ready for the powered on fault checks. 296 powerOnTimer.restartOnce(powerOnInterval); 297 } 298 } 299 } 300 301 if (!faultFound && (inputFault >= FAULT_COUNT)) 302 { 303 // If the power is on, report the fault in an error log entry. 304 if (powerOn) 305 { 306 util::NamesValues nv; 307 nv.add("STATUS_WORD", statusWord); 308 captureCmd(nv, STATUS_INPUT, Type::Debug); 309 310 using metadata = 311 org::open_power::Witherspoon::Fault::PowerSupplyInputFault; 312 313 report<PowerSupplyInputFault>( 314 metadata::RAW_STATUS(nv.get().c_str()), 315 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 316 317 faultFound = true; 318 } 319 } 320 } 321 322 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 323 { 324 using namespace phosphor::pmbus; 325 326 if (powerOnFault < FAULT_COUNT) 327 { 328 // Check PG# and UNIT_IS_OFF 329 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 330 (statusWord & status_word::UNIT_IS_OFF)) 331 { 332 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 333 entry("STATUS_WORD=0x%04X", statusWord)); 334 powerOnFault++; 335 } 336 else 337 { 338 if (powerOnFault > 0) 339 { 340 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 341 powerOnFault = 0; 342 } 343 } 344 345 if (!faultFound && (powerOnFault >= FAULT_COUNT)) 346 { 347 faultFound = true; 348 349 util::NamesValues nv; 350 nv.add("STATUS_WORD", statusWord); 351 captureCmd(nv, STATUS_INPUT, Type::Debug); 352 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 353 captureCmd(nv, status0Vout, Type::Debug); 354 captureCmd(nv, STATUS_IOUT, Type::Debug); 355 captureCmd(nv, STATUS_MFR, Type::Debug); 356 357 using metadata = 358 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn; 359 360 // A power supply is OFF (or pgood low) but should be on. 361 report<PowerSupplyShouldBeOn>( 362 metadata::RAW_STATUS(nv.get().c_str()), 363 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 364 } 365 } 366 } 367 368 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 369 { 370 using namespace phosphor::pmbus; 371 372 if (outputOCFault < FAULT_COUNT) 373 { 374 // Check for an output overcurrent fault. 375 if ((statusWord & status_word::IOUT_OC_FAULT)) 376 { 377 outputOCFault++; 378 } 379 else 380 { 381 if (outputOCFault > 0) 382 { 383 outputOCFault = 0; 384 } 385 } 386 387 if (!faultFound && (outputOCFault >= FAULT_COUNT)) 388 { 389 util::NamesValues nv; 390 nv.add("STATUS_WORD", statusWord); 391 captureCmd(nv, STATUS_INPUT, Type::Debug); 392 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 393 captureCmd(nv, status0Vout, Type::Debug); 394 captureCmd(nv, STATUS_IOUT, Type::Debug); 395 captureCmd(nv, STATUS_MFR, Type::Debug); 396 397 using metadata = org::open_power::Witherspoon::Fault:: 398 PowerSupplyOutputOvercurrent; 399 400 report<PowerSupplyOutputOvercurrent>( 401 metadata::RAW_STATUS(nv.get().c_str()), 402 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 403 404 faultFound = true; 405 } 406 } 407 } 408 409 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 410 { 411 using namespace phosphor::pmbus; 412 413 if (outputOVFault < FAULT_COUNT) 414 { 415 // Check for an output overvoltage fault. 416 if (statusWord & status_word::VOUT_OV_FAULT) 417 { 418 outputOVFault++; 419 } 420 else 421 { 422 if (outputOVFault > 0) 423 { 424 outputOVFault = 0; 425 } 426 } 427 428 if (!faultFound && (outputOVFault >= FAULT_COUNT)) 429 { 430 util::NamesValues nv; 431 nv.add("STATUS_WORD", statusWord); 432 captureCmd(nv, STATUS_INPUT, Type::Debug); 433 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 434 captureCmd(nv, status0Vout, Type::Debug); 435 captureCmd(nv, STATUS_IOUT, Type::Debug); 436 captureCmd(nv, STATUS_MFR, Type::Debug); 437 438 using metadata = org::open_power::Witherspoon::Fault:: 439 PowerSupplyOutputOvervoltage; 440 441 report<PowerSupplyOutputOvervoltage>( 442 metadata::RAW_STATUS(nv.get().c_str()), 443 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 444 445 faultFound = true; 446 } 447 } 448 } 449 450 void PowerSupply::checkFanFault(const uint16_t statusWord) 451 { 452 using namespace phosphor::pmbus; 453 454 if (fanFault < FAULT_COUNT) 455 { 456 // Check for a fan fault or warning condition 457 if (statusWord & status_word::FAN_FAULT) 458 { 459 fanFault++; 460 } 461 else 462 { 463 if (fanFault > 0) 464 { 465 fanFault = 0; 466 } 467 } 468 469 if (!faultFound && (fanFault >= FAULT_COUNT)) 470 { 471 util::NamesValues nv; 472 nv.add("STATUS_WORD", statusWord); 473 captureCmd(nv, STATUS_MFR, Type::Debug); 474 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 475 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 476 477 using metadata = 478 org::open_power::Witherspoon::Fault::PowerSupplyFanFault; 479 480 report<PowerSupplyFanFault>( 481 metadata::RAW_STATUS(nv.get().c_str()), 482 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 483 484 faultFound = true; 485 } 486 } 487 } 488 489 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 490 { 491 using namespace phosphor::pmbus; 492 493 // Due to how the PMBus core device driver sends a clear faults command 494 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 495 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 496 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 497 // logging the over-temperature condition. 498 std::uint8_t statusTemperature = 0; 499 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 500 if (temperatureFault < FAULT_COUNT) 501 { 502 if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 503 (statusTemperature & status_temperature::OT_FAULT)) 504 { 505 temperatureFault++; 506 } 507 else 508 { 509 if (temperatureFault > 0) 510 { 511 temperatureFault = 0; 512 } 513 } 514 515 if (!faultFound && (temperatureFault >= FAULT_COUNT)) 516 { 517 // The power supply has had an over-temperature condition. 518 // This may not result in a shutdown if experienced for a short 519 // duration. 520 // This should not occur under normal conditions. 521 // The power supply may be faulty, or the paired supply may be 522 // putting out less current. 523 // Capture command responses with potentially relevant information, 524 // and call out the power supply reporting the condition. 525 util::NamesValues nv; 526 nv.add("STATUS_WORD", statusWord); 527 captureCmd(nv, STATUS_MFR, Type::Debug); 528 captureCmd(nv, STATUS_IOUT, Type::Debug); 529 nv.add("STATUS_TEMPERATURE", statusTemperature); 530 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 531 532 using metadata = org::open_power::Witherspoon::Fault:: 533 PowerSupplyTemperatureFault; 534 535 report<PowerSupplyTemperatureFault>( 536 metadata::RAW_STATUS(nv.get().c_str()), 537 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 538 539 faultFound = true; 540 } 541 } 542 } 543 544 void PowerSupply::clearFaults() 545 { 546 readFail = 0; 547 readFailLogged = false; 548 inputFault = 0; 549 powerOnFault = 0; 550 outputOCFault = 0; 551 outputOVFault = 0; 552 fanFault = 0; 553 temperatureFault = 0; 554 faultFound = false; 555 556 return; 557 } 558 559 void PowerSupply::resolveError(const std::string& callout, 560 const std::string& message) 561 { 562 using EndpointList = std::vector<std::string>; 563 564 try 565 { 566 auto path = callout + "/fault"; 567 // Get the service name from the mapper for the fault callout 568 auto service = util::getService(path, ASSOCIATION_IFACE, bus); 569 570 // Use getProperty utility function to get log entries (endpoints) 571 EndpointList logEntries; 572 util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus, 573 logEntries); 574 575 // It is possible that all such entries for this callout have since 576 // been deleted. 577 if (logEntries.empty()) 578 { 579 return; 580 } 581 582 auto logEntryService = 583 util::getService(logEntries[0], LOGGING_IFACE, bus); 584 if (logEntryService.empty()) 585 { 586 return; 587 } 588 589 // go through each log entry that matches this callout path 590 std::string logMessage; 591 for (const auto& logEntry : logEntries) 592 { 593 // Check to see if this logEntry has a message that matches. 594 util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry, 595 logEntryService, bus, logMessage); 596 597 if (message == logMessage) 598 { 599 // Log entry matches call out and message, set Resolved to true 600 bool resolved = true; 601 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry, 602 logEntryService, bus, resolved); 603 } 604 } 605 } 606 catch (const std::exception& e) 607 { 608 log<level::INFO>("Failed to resolve error", 609 entry("CALLOUT=%s", callout.c_str()), 610 entry("ERROR=%s", message.c_str())); 611 } 612 } 613 614 void PowerSupply::updateInventory() 615 { 616 using namespace phosphor::pmbus; 617 using namespace sdbusplus::message; 618 619 // Build the object map and send it to the inventory 620 using Properties = std::map<std::string, std::variant<std::string, bool>>; 621 using Interfaces = std::map<std::string, Properties>; 622 using Object = std::map<object_path, Interfaces>; 623 Properties assetProps; 624 Properties operProps; 625 Interfaces interfaces; 626 Object object; 627 628 // If any of these accesses fail, the fields will just be 629 // blank in the inventory. Leave logging ReadFailure errors 630 // to analyze() as it runs continuously and will most 631 // likely hit and threshold them first anyway. The 632 // readString() function will do the tracing of the failing 633 // path so this code doesn't need to. 634 for (const auto& fru : fruJson.at("fruConfigs")) 635 { 636 if (fru.at("interface") == ASSET_IFACE) 637 { 638 try 639 { 640 assetProps.emplace( 641 fru.at("propertyName"), 642 present ? pmbusIntf.readString(fru.at("fileName"), 643 inventoryPMBusAccessType) 644 : ""); 645 } 646 catch (const ReadFailure& e) 647 {} 648 } 649 } 650 651 operProps.emplace(FUNCTIONAL_PROP, present); 652 interfaces.emplace(ASSET_IFACE, std::move(assetProps)); 653 interfaces.emplace(OPERATIONAL_STATE_IFACE, std::move(operProps)); 654 655 // For Notify(), just send the relative path of the inventory 656 // object so remove the INVENTORY_OBJ_PATH prefix 657 auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH)); 658 659 object.emplace(path, std::move(interfaces)); 660 661 try 662 { 663 auto service = 664 util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE, bus); 665 666 if (service.empty()) 667 { 668 log<level::ERR>("Unable to get inventory manager service"); 669 return; 670 } 671 672 auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH, 673 INVENTORY_MGR_IFACE, "Notify"); 674 675 method.append(std::move(object)); 676 677 auto reply = bus.call(method); 678 } 679 catch (const std::exception& e) 680 { 681 log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str())); 682 } 683 } 684 685 void PowerSupply::syncHistory() 686 { 687 using namespace phosphor::gpio; 688 689 if (syncGPIODevPath.empty()) 690 { 691 // Sync not implemented 692 return; 693 } 694 695 GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber), 696 Direction::output}; 697 698 try 699 { 700 gpio.set(Value::low); 701 702 std::this_thread::sleep_for(std::chrono::milliseconds{5}); 703 704 gpio.set(Value::high); 705 706 recordManager->clear(); 707 } 708 catch (const std::exception& e) 709 { 710 // Do nothing. There would already be a journal entry. 711 } 712 } 713 714 void PowerSupply::enableHistory( 715 const std::string& objectPath, size_t numRecords, 716 const std::string& syncGPIOPath, size_t syncGPIONum) 717 { 718 historyObjectPath = objectPath; 719 syncGPIODevPath = syncGPIOPath; 720 syncGPIONumber = syncGPIONum; 721 722 recordManager = std::make_unique<history::RecordManager>(numRecords); 723 724 auto avgPath = historyObjectPath + '/' + history::Average::name; 725 auto maxPath = historyObjectPath + '/' + history::Maximum::name; 726 727 average = std::make_unique<history::Average>(bus, avgPath); 728 729 maximum = std::make_unique<history::Maximum>(bus, maxPath); 730 } 731 732 void PowerSupply::updateHistory() 733 { 734 if (!recordManager) 735 { 736 // Not enabled 737 return; 738 } 739 740 // Read just the most recent average/max record 741 auto data = 742 pmbusIntf.readBinary(INPUT_HISTORY, pmbus::Type::HwmonDeviceDebug, 743 history::RecordManager::RAW_RECORD_SIZE); 744 745 // Update D-Bus only if something changed (a new record ID, or cleared out) 746 auto changed = recordManager->add(data); 747 if (changed) 748 { 749 average->values(recordManager->getAverageRecords()); 750 maximum->values(recordManager->getMaximumRecords()); 751 } 752 } 753 754 } // namespace psu 755 } // namespace power 756 } // namespace phosphor 757