#include "config.h" #include "occ_manager.hpp" #include "i2c_occ.hpp" #include "occ_dbus.hpp" #include "utils.hpp" #include <phosphor-logging/elog-errors.hpp> #include <phosphor-logging/log.hpp> #include <xyz/openbmc_project/Common/error.hpp> #include <chrono> #include <cmath> #include <filesystem> #include <regex> namespace open_power { namespace occ { constexpr uint32_t fruTypeNotAvailable = 0xFF; constexpr auto fruTypeSuffix = "fru_type"; constexpr auto faultSuffix = "fault"; constexpr auto inputSuffix = "input"; using namespace phosphor::logging; template <typename T> T readFile(const std::string& path) { std::ifstream ifs; ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | std::ifstream::eofbit); T data; try { ifs.open(path); ifs >> data; ifs.close(); } catch (const std::exception& e) { auto err = errno; throw std::system_error(err, std::generic_category()); } return data; } void Manager::findAndCreateObjects() { #ifndef POWER10 for (auto id = 0; id < MAX_CPUS; ++id) { // Create one occ per cpu auto occ = std::string(OCC_NAME) + std::to_string(id); createObjects(occ); } #else // Create the OCCs based on on the /dev/occX devices auto occs = findOCCsInDev(); if (occs.empty() || (prevOCCSearch.size() != occs.size())) { // Something changed or no OCCs yet, try again in 10s. // Note on the first pass prevOCCSearch will be empty, // so there will be at least one delay to give things // a chance to settle. prevOCCSearch = occs; using namespace std::literals::chrono_literals; discoverTimer->restartOnce(10s); } else { discoverTimer.reset(); // createObjects requires OCC0 first. std::sort(occs.begin(), occs.end()); for (auto id : occs) { createObjects(std::string(OCC_NAME) + std::to_string(id)); } } #endif } std::vector<int> Manager::findOCCsInDev() { std::vector<int> occs; std::regex expr{R"(occ(\d+)$)"}; for (auto& file : fs::directory_iterator("/dev")) { std::smatch match; std::string path{file.path().string()}; if (std::regex_search(path, match, expr)) { auto num = std::stoi(match[1].str()); // /dev numbering starts at 1, ours starts at 0. occs.push_back(num - 1); } } return occs; } int Manager::cpuCreated(sdbusplus::message::message& msg) { namespace fs = std::filesystem; sdbusplus::message::object_path o; msg.read(o); fs::path cpuPath(std::string(std::move(o))); auto name = cpuPath.filename().string(); auto index = name.find(CPU_NAME); name.replace(index, std::strlen(CPU_NAME), OCC_NAME); createObjects(name); return 0; } void Manager::createObjects(const std::string& occ) { auto path = fs::path(OCC_CONTROL_ROOT) / occ; passThroughObjects.emplace_back( std::make_unique<PassThrough>(path.c_str())); statusObjects.emplace_back(std::make_unique<Status>( event, path.c_str(), *this, std::bind(std::mem_fn(&Manager::statusCallBack), this, std::placeholders::_1) #ifdef PLDM , std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(), std::placeholders::_1) #endif )); // Create the power cap monitor object for master occ (0) if (!pcap) { pcap = std::make_unique<open_power::occ::powercap::PowerCap>( *statusObjects.front()); } #ifdef POWER10 // Create the power mode monitor object for master occ (0) if (!pmode) { pmode = std::make_unique<open_power::occ::powermode::PowerMode>( *statusObjects.front()); } #endif } void Manager::statusCallBack(bool status) { using InternalFailure = sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; // At this time, it won't happen but keeping it // here just in case something changes in the future if ((activeCount == 0) && (!status)) { log<level::ERR>("Invalid update on OCCActive"); elog<InternalFailure>(); } activeCount += status ? 1 : -1; // Only start presence detection if all the OCCs are bound if (activeCount == statusObjects.size()) { for (auto& obj : statusObjects) { obj->addPresenceWatchMaster(); } } if ((!_pollTimer->isEnabled()) && (activeCount > 0)) { log<level::INFO>( fmt::format( "Manager::statusCallBack(): {} OCCs will be polled every {} seconds", activeCount, pollInterval) .c_str()); // Send poll and start OCC poll timer pollerTimerExpired(); } else if ((_pollTimer->isEnabled()) && (activeCount == 0)) { // Stop OCC poll timer log<level::INFO>( "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); _pollTimer->setEnabled(false); #ifdef READ_OCC_SENSORS for (auto& obj : statusObjects) { setSensorValueToNaN(obj->getOccInstanceID()); } #endif } } #ifdef I2C_OCC void Manager::initStatusObjects() { // Make sure we have a valid path string static_assert(sizeof(DEV_PATH) != 0); auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); auto occMasterName = deviceNames.front(); for (auto& name : deviceNames) { i2c_occ::i2cToDbus(name); name = std::string(OCC_NAME) + '_' + name; auto path = fs::path(OCC_CONTROL_ROOT) / name; statusObjects.emplace_back( std::make_unique<Status>(event, path.c_str(), *this)); } // The first device is master occ pcap = std::make_unique<open_power::occ::powercap::PowerCap>( *statusObjects.front(), occMasterName); #ifdef POWER10 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( *statusObjects.front()); #endif } #endif #ifdef PLDM bool Manager::updateOCCActive(instanceID instance, bool status) { return (statusObjects[instance])->occActive(status); } #endif void Manager::pollerTimerExpired() { if (activeCount == 0) { // No OCCs running, so poll timer will not be restarted log<level::INFO>( "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted"); } if (!_pollTimer) { log<level::ERR>( "Manager::pollerTimerExpired() ERROR: Timer not defined"); return; } for (auto& obj : statusObjects) { // Read sysfs to force kernel to poll OCC obj->readOccState(); #ifdef READ_OCC_SENSORS // Read occ sensor values auto id = obj->getOccInstanceID(); if (!obj->occActive()) { // Occ not activated setSensorValueToNaN(id); continue; } getSensorValues(id, obj->isMasterOcc()); #endif } // Restart OCC poll timer _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); } #ifdef READ_OCC_SENSORS void Manager::readTempSensors(const fs::path& path, uint32_t id) { std::regex expr{"temp\\d+_label$"}; // Example: temp5_label for (auto& file : fs::directory_iterator(path)) { if (!std::regex_search(file.path().string(), expr)) { continue; } uint32_t labelValue{0}; try { labelValue = readFile<uint32_t>(file.path()); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readTempSensors: Failed reading {}, errno = {}", file.path().string(), e.code().value()) .c_str()); continue; } const std::string& tempLabel = "label"; const std::string filePathString = file.path().string().substr( 0, file.path().string().length() - tempLabel.length()); uint32_t fruTypeValue{0}; try { fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readTempSensors: Failed reading {}, errno = {}", filePathString + fruTypeSuffix, e.code().value()) .c_str()); continue; } std::string sensorPath = OCC_SENSORS_ROOT + std::string("/temperature/"); if (fruTypeValue == VRMVdd) { sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp"); } else { uint16_t type = (labelValue & 0xFF000000) >> 24; uint16_t instanceID = labelValue & 0x0000FFFF; if (type == OCC_DIMM_TEMP_SENSOR_TYPE) { if (fruTypeValue == fruTypeNotAvailable) { // Not all DIMM related temps are available to read // (no _input file in this case) continue; } auto iter = dimmTempSensorName.find(fruTypeValue); if (iter == dimmTempSensorName.end()) { log<level::ERR>( fmt::format( "readTempSensors: Fru type error! fruTypeValue = {}) ", fruTypeValue) .c_str()); continue; } sensorPath.append("dimm" + std::to_string(instanceID) + iter->second); } else if (type == OCC_CPU_TEMP_SENSOR_TYPE) { if (fruTypeValue != processorCore) { // TODO: support IO ring temp continue; } // The OCC reports small core temps, of which there are // two per big core. All current P10 systems are in big // core mode, so use a big core name. uint16_t coreNum = instanceID / 2; uint16_t tempNum = instanceID % 2; sensorPath.append("proc" + std::to_string(id) + "_core" + std::to_string(coreNum) + "_" + std::to_string(tempNum) + "_temp"); } else { continue; } } uint32_t faultValue{0}; try { faultValue = readFile<uint32_t>(filePathString + faultSuffix); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readTempSensors: Failed reading {}, errno = {}", filePathString + faultSuffix, e.code().value()) .c_str()); continue; } // At this point, the sensor will be created for sure. if (existingSensors.find(sensorPath) == existingSensors.end()) { open_power::occ::dbus::OccDBusSensors::getOccDBus() .setChassisAssociation(sensorPath); } if (faultValue != 0) { open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( sensorPath, std::numeric_limits<double>::quiet_NaN()); open_power::occ::dbus::OccDBusSensors::getOccDBus() .setOperationalStatus(sensorPath, false); continue; } double tempValue{0}; try { tempValue = readFile<double>(filePathString + inputSuffix); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readTempSensors: Failed reading {}, errno = {}", filePathString + inputSuffix, e.code().value()) .c_str()); continue; } open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( sensorPath, tempValue * std::pow(10, -3)); open_power::occ::dbus::OccDBusSensors::getOccDBus() .setOperationalStatus(sensorPath, true); existingSensors[sensorPath] = id; } return; } std::optional<std::string> Manager::getPowerLabelFunctionID(const std::string& value) { // If the value is "system", then the FunctionID is "system". if (value == "system") { return value; } // If the value is not "system", then the label value have 3 numbers, of // which we only care about the middle one: // <sensor id>_<function id>_<apss channel> // eg: The value is "0_10_5" , then the FunctionID is "10". if (value.find("_") == std::string::npos) { return std::nullopt; } auto powerLabelValue = value.substr((value.find("_") + 1)); if (powerLabelValue.find("_") == std::string::npos) { return std::nullopt; } return powerLabelValue.substr(0, powerLabelValue.find("_")); } void Manager::readPowerSensors(const fs::path& path, uint32_t id) { std::regex expr{"power\\d+_label$"}; // Example: power5_label for (auto& file : fs::directory_iterator(path)) { if (!std::regex_search(file.path().string(), expr)) { continue; } std::string labelValue; try { labelValue = readFile<std::string>(file.path()); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readPowerSensors: Failed reading {}, errno = {}", file.path().string(), e.code().value()) .c_str()); continue; } auto functionID = getPowerLabelFunctionID(labelValue); if (functionID == std::nullopt) { continue; } const std::string& tempLabel = "label"; const std::string filePathString = file.path().string().substr( 0, file.path().string().length() - tempLabel.length()); std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); auto iter = powerSensorName.find(*functionID); if (iter == powerSensorName.end()) { continue; } sensorPath.append(iter->second); double tempValue{0}; try { tempValue = readFile<double>(filePathString + inputSuffix); } catch (const std::system_error& e) { log<level::DEBUG>( fmt::format("readTempSensors: Failed reading {}, errno = {}", filePathString + inputSuffix, e.code().value()) .c_str()); continue; } open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); open_power::occ::dbus::OccDBusSensors::getOccDBus() .setOperationalStatus(sensorPath, true); if (existingSensors.find(sensorPath) == existingSensors.end()) { open_power::occ::dbus::OccDBusSensors::getOccDBus() .setChassisAssociation(sensorPath); } existingSensors[sensorPath] = id; } return; } void Manager::setSensorValueToNaN(uint32_t id) { for (const auto& [sensorPath, occId] : existingSensors) { if (occId == id) { open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( sensorPath, std::numeric_limits<double>::quiet_NaN()); } } return; } void Manager::getSensorValues(uint32_t id, bool masterOcc) { const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1); fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"}; // Need to get the hwmonXX directory name, there better only be 1 dir assert(std::distance(fs::directory_iterator(fileName), fs::directory_iterator{}) == 1); // Now set our path to this full path, including this hwmonXX directory fileName = fs::path(*fs::directory_iterator(fileName)); // Read temperature sensors readTempSensors(fileName, id); if (masterOcc) { // Read power sensors readPowerSensors(fileName, id); } return; } #endif } // namespace occ } // namespace open_power