/* * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "NvidiaGpuDevice.hpp" #include "NvidiaDeviceDiscovery.hpp" #include "NvidiaGpuSensor.hpp" #include "Thresholds.hpp" #include "Utils.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GpuDevice::GpuDevice(const SensorConfigs& configs, const std::string& name, const std::string& path, const std::shared_ptr& conn, uint8_t eid, boost::asio::io_context& io, mctp::MctpRequester& mctpRequester, sdbusplus::asio::object_server& objectServer) : eid(eid), sensorPollMs(std::chrono::milliseconds{configs.pollRate}), waitTimer(io, std::chrono::steady_clock::duration(0)), mctpRequester(mctpRequester), conn(conn), objectServer(objectServer), configs(configs), name(escapeName(name)), path(path) { makeSensors(); } void GpuDevice::makeSensors() { tempSensor = std::make_shared( conn, mctpRequester, name + "_TEMP_0", path, eid, gpuTempSensorId, objectServer, std::vector{}); readThermalParameters( eid, std::vector{gpuTLimitWarnringThresholdId, gpuTLimitCriticalThresholdId, gpuTLimitHardshutDownThresholdId}, mctpRequester, std::bind_front(&GpuDevice::processTLimitThresholds, this)); powerSensor = std::make_shared( conn, mctpRequester, name + "_Power_0", path, eid, gpuPowerSensorId, objectServer, std::vector{}); lg2::info("Added GPU {NAME} Sensors with chassis path: {PATH}.", "NAME", name, "PATH", path); read(); } void GpuDevice::processTLimitThresholds(uint8_t rc, const std::vector& thresholds) { std::vector tLimitThresholds{}; if (rc == 0) { tLimitThresholds = { thresholds::Threshold{thresholds::Level::WARNING, thresholds::Direction::LOW, static_cast(thresholds[0])}, thresholds::Threshold{thresholds::Level::CRITICAL, thresholds::Direction::LOW, static_cast(thresholds[1])}, thresholds::Threshold{thresholds::Level::HARDSHUTDOWN, thresholds::Direction::LOW, static_cast(thresholds[2])}}; } tLimitSensor = std::make_shared( conn, mctpRequester, name + "_TEMP_1", path, eid, gpuTLimitSensorId, objectServer, std::move(tLimitThresholds)); } void GpuDevice::read() { tempSensor->update(); if (tLimitSensor) { tLimitSensor->update(); } powerSensor->update(); waitTimer.expires_after(std::chrono::milliseconds(sensorPollMs)); waitTimer.async_wait([this](const boost::system::error_code& ec) { if (ec) { return; } read(); }); }