1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "power_supply.hpp" 19 20 #include "elog-errors.hpp" 21 #include "gpio.hpp" 22 #include "names_values.hpp" 23 #include "pmbus.hpp" 24 #include "types.hpp" 25 #include "utility.hpp" 26 27 #include <org/open_power/Witherspoon/Fault/error.hpp> 28 #include <phosphor-logging/log.hpp> 29 #include <xyz/openbmc_project/Common/Device/error.hpp> 30 31 #include <functional> 32 33 namespace phosphor 34 { 35 namespace power 36 { 37 namespace psu 38 { 39 40 using namespace phosphor::logging; 41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error; 42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error; 43 44 PowerSupply::PowerSupply(const std::string& name, size_t inst, 45 const std::string& objpath, const std::string& invpath, 46 sdbusplus::bus::bus& bus, const sdeventplus::Event& e, 47 std::chrono::seconds& t, std::chrono::seconds& p) : 48 Device(name, inst), 49 monitorPath(objpath), pmbusIntf(objpath), 50 inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p), 51 presentTimer(e, std::bind([this]() { 52 // The hwmon path may have changed. 53 pmbusIntf.findHwmonDir(); 54 this->present = true; 55 56 // Sync the INPUT_HISTORY data for all PSs 57 syncHistory(); 58 59 // Update the inventory for the new device 60 updateInventory(); 61 })), 62 powerOnInterval(t), 63 powerOnTimer(e, std::bind([this]() { this->powerOn = true; })) 64 { 65 getAccessType(); 66 67 using namespace sdbusplus::bus; 68 using namespace phosphor::pmbus; 69 std::uint16_t statusWord = 0; 70 try 71 { 72 // Read the 2 byte STATUS_WORD value to check for faults. 73 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 74 if (!((statusWord & status_word::INPUT_FAULT_WARN) || 75 (statusWord & status_word::VIN_UV_FAULT))) 76 { 77 resolveError(inventoryPath, 78 std::string(PowerSupplyInputFault::errName)); 79 } 80 } 81 catch (ReadFailure& e) 82 { 83 log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check " 84 "for power-supply input faults."); 85 } 86 presentMatch = std::make_unique<match_t>( 87 bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE), 88 [this](auto& msg) { this->inventoryChanged(msg); }); 89 // Get initial presence state. 90 updatePresence(); 91 92 // Write the SN, PN, etc to the inventory 93 updateInventory(); 94 95 // Subscribe to power state changes 96 powerOnMatch = std::make_unique<match_t>( 97 bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE), 98 [this](auto& msg) { this->powerStateChanged(msg); }); 99 // Get initial power state. 100 updatePowerState(); 101 } 102 103 void PowerSupply::getAccessType() 104 { 105 using namespace phosphor::power::util; 106 fruJson = loadJSONFromFile(PSU_JSON_PATH); 107 if (fruJson == nullptr) 108 { 109 log<level::ERR>("InternalFailure when parsing the JSON file"); 110 return; 111 } 112 inventoryPMBusAccessType = getPMBusAccessType(fruJson); 113 } 114 115 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd, 116 phosphor::pmbus::Type type) 117 { 118 if (pmbusIntf.exists(cmd, type)) 119 { 120 try 121 { 122 auto val = pmbusIntf.read(cmd, type); 123 nv.add(cmd, val); 124 } 125 catch (std::exception& e) 126 { 127 log<level::INFO>("Unable to capture metadata", 128 entry("CMD=%s", cmd.c_str())); 129 } 130 } 131 } 132 133 void PowerSupply::analyze() 134 { 135 using namespace phosphor::pmbus; 136 137 try 138 { 139 if (present) 140 { 141 std::uint16_t statusWord = 0; 142 143 // Read the 2 byte STATUS_WORD value to check for faults. 144 statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug); 145 readFail = 0; 146 147 checkInputFault(statusWord); 148 149 if (powerOn && (inputFault == 0) && !faultFound) 150 { 151 checkFanFault(statusWord); 152 checkTemperatureFault(statusWord); 153 checkOutputOvervoltageFault(statusWord); 154 checkCurrentOutOverCurrentFault(statusWord); 155 checkPGOrUnitOffFault(statusWord); 156 } 157 158 updateHistory(); 159 } 160 } 161 catch (ReadFailure& e) 162 { 163 if (readFail < FAULT_COUNT) 164 { 165 readFail++; 166 } 167 168 if (!readFailLogged && readFail >= FAULT_COUNT) 169 { 170 commit<ReadFailure>(); 171 readFailLogged = true; 172 } 173 } 174 175 return; 176 } 177 178 void PowerSupply::inventoryChanged(sdbusplus::message::message& msg) 179 { 180 std::string msgSensor; 181 std::map<std::string, std::variant<uint32_t, bool>> msgData; 182 msg.read(msgSensor, msgData); 183 184 // Check if it was the Present property that changed. 185 auto valPropMap = msgData.find(PRESENT_PROP); 186 if (valPropMap != msgData.end()) 187 { 188 if (std::get<bool>(valPropMap->second)) 189 { 190 clearFaults(); 191 presentTimer.restartOnce(presentInterval); 192 } 193 else 194 { 195 present = false; 196 presentTimer.setEnabled(false); 197 198 // Clear out the now outdated inventory properties 199 updateInventory(); 200 } 201 } 202 203 return; 204 } 205 206 void PowerSupply::updatePresence() 207 { 208 // Use getProperty utility function to get presence status. 209 std::string service = "xyz.openbmc_project.Inventory.Manager"; 210 util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service, 211 bus, this->present); 212 } 213 214 void PowerSupply::powerStateChanged(sdbusplus::message::message& msg) 215 { 216 int32_t state = 0; 217 std::string msgSensor; 218 std::map<std::string, std::variant<int32_t>> msgData; 219 msg.read(msgSensor, msgData); 220 221 // Check if it was the Present property that changed. 222 auto valPropMap = msgData.find("state"); 223 if (valPropMap != msgData.end()) 224 { 225 state = std::get<int32_t>(valPropMap->second); 226 227 // Power is on when state=1. Set the fault logged variables to false 228 // and start the power on timer when the state changes to 1. 229 if (state) 230 { 231 clearFaults(); 232 powerOnTimer.restartOnce(powerOnInterval); 233 } 234 else 235 { 236 powerOnTimer.setEnabled(false); 237 powerOn = false; 238 } 239 } 240 } 241 242 void PowerSupply::updatePowerState() 243 { 244 powerOn = util::isPoweredOn(bus); 245 } 246 247 void PowerSupply::checkInputFault(const uint16_t statusWord) 248 { 249 using namespace phosphor::pmbus; 250 251 if ((inputFault < FAULT_COUNT) && 252 ((statusWord & status_word::INPUT_FAULT_WARN) || 253 (statusWord & status_word::VIN_UV_FAULT))) 254 { 255 if (inputFault == 0) 256 { 257 log<level::INFO>("INPUT or VIN_UV fault", 258 entry("STATUS_WORD=0x%04X", statusWord)); 259 } 260 261 inputFault++; 262 } 263 else 264 { 265 if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) && 266 !(statusWord & status_word::VIN_UV_FAULT)) 267 { 268 inputFault = 0; 269 faultFound = false; 270 // When an input fault occurs, the power supply cannot be on. 271 // However, the check for the case where the power supply should be 272 // on will stop when there is a fault found. 273 // Clear the powerOnFault when the inputFault is cleared to reset 274 // the powerOnFault de-glitching. 275 powerOnFault = 0; 276 277 log<level::INFO>("INPUT_FAULT_WARN cleared", 278 entry("POWERSUPPLY=%s", inventoryPath.c_str())); 279 280 resolveError(inventoryPath, 281 std::string(PowerSupplyInputFault::errName)); 282 283 if (powerOn) 284 { 285 // The power supply will not be immediately powered on after 286 // the input power is restored. 287 powerOn = false; 288 // Start up the timer that will set the state to indicate we 289 // are ready for the powered on fault checks. 290 powerOnTimer.restartOnce(powerOnInterval); 291 } 292 } 293 } 294 295 if (!faultFound && (inputFault >= FAULT_COUNT)) 296 { 297 // If the power is on, report the fault in an error log entry. 298 if (powerOn) 299 { 300 util::NamesValues nv; 301 nv.add("STATUS_WORD", statusWord); 302 captureCmd(nv, STATUS_INPUT, Type::Debug); 303 304 using metadata = 305 org::open_power::Witherspoon::Fault::PowerSupplyInputFault; 306 307 report<PowerSupplyInputFault>( 308 metadata::RAW_STATUS(nv.get().c_str()), 309 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 310 311 faultFound = true; 312 } 313 } 314 } 315 316 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord) 317 { 318 using namespace phosphor::pmbus; 319 320 if (powerOnFault < FAULT_COUNT) 321 { 322 // Check PG# and UNIT_IS_OFF 323 if ((statusWord & status_word::POWER_GOOD_NEGATED) || 324 (statusWord & status_word::UNIT_IS_OFF)) 325 { 326 log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad", 327 entry("STATUS_WORD=0x%04X", statusWord)); 328 powerOnFault++; 329 } 330 else 331 { 332 if (powerOnFault > 0) 333 { 334 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good"); 335 powerOnFault = 0; 336 } 337 } 338 339 if (!faultFound && (powerOnFault >= FAULT_COUNT)) 340 { 341 faultFound = true; 342 343 util::NamesValues nv; 344 nv.add("STATUS_WORD", statusWord); 345 captureCmd(nv, STATUS_INPUT, Type::Debug); 346 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 347 captureCmd(nv, status0Vout, Type::Debug); 348 captureCmd(nv, STATUS_IOUT, Type::Debug); 349 captureCmd(nv, STATUS_MFR, Type::Debug); 350 351 using metadata = 352 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn; 353 354 // A power supply is OFF (or pgood low) but should be on. 355 report<PowerSupplyShouldBeOn>( 356 metadata::RAW_STATUS(nv.get().c_str()), 357 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 358 } 359 } 360 } 361 362 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord) 363 { 364 using namespace phosphor::pmbus; 365 366 if (outputOCFault < FAULT_COUNT) 367 { 368 // Check for an output overcurrent fault. 369 if ((statusWord & status_word::IOUT_OC_FAULT)) 370 { 371 outputOCFault++; 372 } 373 else 374 { 375 if (outputOCFault > 0) 376 { 377 outputOCFault = 0; 378 } 379 } 380 381 if (!faultFound && (outputOCFault >= FAULT_COUNT)) 382 { 383 util::NamesValues nv; 384 nv.add("STATUS_WORD", statusWord); 385 captureCmd(nv, STATUS_INPUT, Type::Debug); 386 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 387 captureCmd(nv, status0Vout, Type::Debug); 388 captureCmd(nv, STATUS_IOUT, Type::Debug); 389 captureCmd(nv, STATUS_MFR, Type::Debug); 390 391 using metadata = org::open_power::Witherspoon::Fault:: 392 PowerSupplyOutputOvercurrent; 393 394 report<PowerSupplyOutputOvercurrent>( 395 metadata::RAW_STATUS(nv.get().c_str()), 396 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 397 398 faultFound = true; 399 } 400 } 401 } 402 403 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord) 404 { 405 using namespace phosphor::pmbus; 406 407 if (outputOVFault < FAULT_COUNT) 408 { 409 // Check for an output overvoltage fault. 410 if (statusWord & status_word::VOUT_OV_FAULT) 411 { 412 outputOVFault++; 413 } 414 else 415 { 416 if (outputOVFault > 0) 417 { 418 outputOVFault = 0; 419 } 420 } 421 422 if (!faultFound && (outputOVFault >= FAULT_COUNT)) 423 { 424 util::NamesValues nv; 425 nv.add("STATUS_WORD", statusWord); 426 captureCmd(nv, STATUS_INPUT, Type::Debug); 427 auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0); 428 captureCmd(nv, status0Vout, Type::Debug); 429 captureCmd(nv, STATUS_IOUT, Type::Debug); 430 captureCmd(nv, STATUS_MFR, Type::Debug); 431 432 using metadata = org::open_power::Witherspoon::Fault:: 433 PowerSupplyOutputOvervoltage; 434 435 report<PowerSupplyOutputOvervoltage>( 436 metadata::RAW_STATUS(nv.get().c_str()), 437 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 438 439 faultFound = true; 440 } 441 } 442 } 443 444 void PowerSupply::checkFanFault(const uint16_t statusWord) 445 { 446 using namespace phosphor::pmbus; 447 448 if (fanFault < FAULT_COUNT) 449 { 450 // Check for a fan fault or warning condition 451 if (statusWord & status_word::FAN_FAULT) 452 { 453 fanFault++; 454 } 455 else 456 { 457 if (fanFault > 0) 458 { 459 fanFault = 0; 460 } 461 } 462 463 if (!faultFound && (fanFault >= FAULT_COUNT)) 464 { 465 util::NamesValues nv; 466 nv.add("STATUS_WORD", statusWord); 467 captureCmd(nv, STATUS_MFR, Type::Debug); 468 captureCmd(nv, STATUS_TEMPERATURE, Type::Debug); 469 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 470 471 using metadata = 472 org::open_power::Witherspoon::Fault::PowerSupplyFanFault; 473 474 report<PowerSupplyFanFault>( 475 metadata::RAW_STATUS(nv.get().c_str()), 476 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 477 478 faultFound = true; 479 } 480 } 481 } 482 483 void PowerSupply::checkTemperatureFault(const uint16_t statusWord) 484 { 485 using namespace phosphor::pmbus; 486 487 // Due to how the PMBus core device driver sends a clear faults command 488 // the bit in STATUS_WORD will likely be cleared when we attempt to examine 489 // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the 490 // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with 491 // logging the over-temperature condition. 492 std::uint8_t statusTemperature = 0; 493 statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug); 494 if (temperatureFault < FAULT_COUNT) 495 { 496 if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) || 497 (statusTemperature & status_temperature::OT_FAULT)) 498 { 499 temperatureFault++; 500 } 501 else 502 { 503 if (temperatureFault > 0) 504 { 505 temperatureFault = 0; 506 } 507 } 508 509 if (!faultFound && (temperatureFault >= FAULT_COUNT)) 510 { 511 // The power supply has had an over-temperature condition. 512 // This may not result in a shutdown if experienced for a short 513 // duration. 514 // This should not occur under normal conditions. 515 // The power supply may be faulty, or the paired supply may be 516 // putting out less current. 517 // Capture command responses with potentially relevant information, 518 // and call out the power supply reporting the condition. 519 util::NamesValues nv; 520 nv.add("STATUS_WORD", statusWord); 521 captureCmd(nv, STATUS_MFR, Type::Debug); 522 captureCmd(nv, STATUS_IOUT, Type::Debug); 523 nv.add("STATUS_TEMPERATURE", statusTemperature); 524 captureCmd(nv, STATUS_FANS_1_2, Type::Debug); 525 526 using metadata = org::open_power::Witherspoon::Fault:: 527 PowerSupplyTemperatureFault; 528 529 report<PowerSupplyTemperatureFault>( 530 metadata::RAW_STATUS(nv.get().c_str()), 531 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str())); 532 533 faultFound = true; 534 } 535 } 536 } 537 538 void PowerSupply::clearFaults() 539 { 540 readFail = 0; 541 readFailLogged = false; 542 inputFault = 0; 543 powerOnFault = 0; 544 outputOCFault = 0; 545 outputOVFault = 0; 546 fanFault = 0; 547 temperatureFault = 0; 548 faultFound = false; 549 550 return; 551 } 552 553 void PowerSupply::resolveError(const std::string& callout, 554 const std::string& message) 555 { 556 using EndpointList = std::vector<std::string>; 557 558 try 559 { 560 auto path = callout + "/fault"; 561 // Get the service name from the mapper for the fault callout 562 auto service = util::getService(path, ASSOCIATION_IFACE, bus); 563 564 // Use getProperty utility function to get log entries (endpoints) 565 EndpointList logEntries; 566 util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus, 567 logEntries); 568 569 // It is possible that all such entries for this callout have since 570 // been deleted. 571 if (logEntries.empty()) 572 { 573 return; 574 } 575 576 auto logEntryService = 577 util::getService(logEntries[0], LOGGING_IFACE, bus); 578 if (logEntryService.empty()) 579 { 580 return; 581 } 582 583 // go through each log entry that matches this callout path 584 std::string logMessage; 585 for (const auto& logEntry : logEntries) 586 { 587 // Check to see if this logEntry has a message that matches. 588 util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry, 589 logEntryService, bus, logMessage); 590 591 if (message == logMessage) 592 { 593 // Log entry matches call out and message, set Resolved to true 594 bool resolved = true; 595 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry, 596 logEntryService, bus, resolved); 597 } 598 } 599 } 600 catch (std::exception& e) 601 { 602 log<level::INFO>("Failed to resolve error", 603 entry("CALLOUT=%s", callout.c_str()), 604 entry("ERROR=%s", message.c_str())); 605 } 606 } 607 608 void PowerSupply::updateInventory() 609 { 610 using namespace phosphor::pmbus; 611 using namespace sdbusplus::message; 612 613 // Build the object map and send it to the inventory 614 using Properties = std::map<std::string, variant<std::string>>; 615 using Interfaces = std::map<std::string, Properties>; 616 using Object = std::map<object_path, Interfaces>; 617 Properties assetProps; 618 Interfaces interfaces; 619 Object object; 620 621 // If any of these accesses fail, the fields will just be 622 // blank in the inventory. Leave logging ReadFailure errors 623 // to analyze() as it runs continuously and will most 624 // likely hit and threshold them first anyway. The 625 // readString() function will do the tracing of the failing 626 // path so this code doesn't need to. 627 for (const auto& fru : fruJson.at("fruConfigs")) 628 { 629 if (fru.at("interface") == ASSET_IFACE) 630 { 631 try 632 { 633 assetProps.emplace( 634 fru.at("propertyName"), 635 present ? pmbusIntf.readString(fru.at("fileName"), 636 inventoryPMBusAccessType) 637 : ""); 638 } 639 catch (ReadFailure& e) 640 { 641 } 642 } 643 } 644 645 interfaces.emplace(ASSET_IFACE, std::move(assetProps)); 646 647 // For Notify(), just send the relative path of the inventory 648 // object so remove the INVENTORY_OBJ_PATH prefix 649 auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH)); 650 651 object.emplace(path, std::move(interfaces)); 652 653 try 654 { 655 auto service = 656 util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE, bus); 657 658 if (service.empty()) 659 { 660 log<level::ERR>("Unable to get inventory manager service"); 661 return; 662 } 663 664 auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH, 665 INVENTORY_MGR_IFACE, "Notify"); 666 667 method.append(std::move(object)); 668 669 auto reply = bus.call(method); 670 } 671 catch (std::exception& e) 672 { 673 log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str())); 674 } 675 } 676 677 void PowerSupply::syncHistory() 678 { 679 using namespace phosphor::gpio; 680 681 if (syncGPIODevPath.empty()) 682 { 683 // Sync not implemented 684 return; 685 } 686 687 GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber), 688 Direction::output}; 689 690 try 691 { 692 gpio.set(Value::low); 693 694 std::this_thread::sleep_for(std::chrono::milliseconds{5}); 695 696 gpio.set(Value::high); 697 698 recordManager->clear(); 699 } 700 catch (std::exception& e) 701 { 702 // Do nothing. There would already be a journal entry. 703 } 704 } 705 706 void PowerSupply::enableHistory(const std::string& objectPath, 707 size_t numRecords, 708 const std::string& syncGPIOPath, 709 size_t syncGPIONum) 710 { 711 historyObjectPath = objectPath; 712 syncGPIODevPath = syncGPIOPath; 713 syncGPIONumber = syncGPIONum; 714 715 recordManager = std::make_unique<history::RecordManager>(numRecords); 716 717 auto avgPath = historyObjectPath + '/' + history::Average::name; 718 auto maxPath = historyObjectPath + '/' + history::Maximum::name; 719 720 average = std::make_unique<history::Average>(bus, avgPath); 721 722 maximum = std::make_unique<history::Maximum>(bus, maxPath); 723 } 724 725 void PowerSupply::updateHistory() 726 { 727 if (!recordManager) 728 { 729 // Not enabled 730 return; 731 } 732 733 // Read just the most recent average/max record 734 auto data = 735 pmbusIntf.readBinary(INPUT_HISTORY, pmbus::Type::HwmonDeviceDebug, 736 history::RecordManager::RAW_RECORD_SIZE); 737 738 // Update D-Bus only if something changed (a new record ID, or cleared out) 739 auto changed = recordManager->add(data); 740 if (changed) 741 { 742 average->values(std::move(recordManager->getAverageRecords())); 743 maximum->values(std::move(recordManager->getMaximumRecords())); 744 } 745 } 746 747 } // namespace psu 748 } // namespace power 749 } // namespace phosphor 750