/* * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "NvidiaDeviceDiscovery.hpp" #include "NvidiaGpuDevice.hpp" #include "NvidiaSmaDevice.hpp" #include "Utils.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static constexpr auto sensorPollRateMs = 1000; void processQueryDeviceIdResponse( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const SensorConfigs& configs, const std::string& path, uint8_t eid, int sendRecvMsgResult, std::span queryDeviceIdentificationResponse) { if (sendRecvMsgResult != 0) { lg2::error( "Error processing MCTP endpoint with eid {EID} : sending message over MCTP failed, rc={RC}", "EID", eid, "RC", sendRecvMsgResult); return; } ocp::accelerator_management::CompletionCode cc{}; uint16_t reasonCode = 0; uint8_t responseDeviceType = 0; uint8_t responseInstanceId = 0; auto rc = gpu::decodeQueryDeviceIdentificationResponse( queryDeviceIdentificationResponse, cc, reasonCode, responseDeviceType, responseInstanceId); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { lg2::error( "Error processing MCTP endpoint with eid {EID} : decode failed, rc={RC}, cc={CC}, reasonCode={RESC}", "EID", eid, "RC", rc, "CC", cc, "RESC", reasonCode); return; } switch (static_cast(responseDeviceType)) { case gpu::DeviceIdentification::DEVICE_GPU: { lg2::info( "Found the GPU with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.", "EID", eid, "DEVTYPE", responseDeviceType, "IID", responseInstanceId); auto gpuName = configs.name + '_' + std::to_string(responseInstanceId); gpuDevices[gpuName] = std::make_shared(configs, gpuName, path, conn, eid, io, mctpRequester, objectServer); break; } case gpu::DeviceIdentification::DEVICE_SMA: { lg2::info( "Found the SMA Device with EID {EID}, DeviceType {DEVTYPE}, InstanceId {IID}.", "EID", eid, "DEVTYPE", responseDeviceType, "IID", responseInstanceId); auto smaName = configs.name + "_SMA_" + std::to_string(responseInstanceId); smaDevices[smaName] = std::make_shared(configs, smaName, path, conn, eid, io, mctpRequester, objectServer); break; } } } void queryDeviceIdentification( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const SensorConfigs& configs, const std::string& path, uint8_t eid) { auto queryDeviceIdentificationRequest = std::make_shared< std::array>(); auto queryDeviceIdentificationResponse = std::make_shared< std::array>(); auto rc = gpu::encodeQueryDeviceIdentificationRequest( 0, *queryDeviceIdentificationRequest); if (rc != 0) { lg2::error( "Error processing MCTP endpoint with eid {EID} : encode failed, rc={RC}", "EID", eid, "RC", rc); return; } mctpRequester.sendRecvMsg( eid, *queryDeviceIdentificationRequest, *queryDeviceIdentificationResponse, [&io, &objectServer, &gpuDevices, &smaDevices, conn, &mctpRequester, configs, path, eid, queryDeviceIdentificationRequest, queryDeviceIdentificationResponse](int sendRecvMsgResult) { processQueryDeviceIdResponse( io, objectServer, gpuDevices, smaDevices, conn, mctpRequester, configs, path, eid, sendRecvMsgResult, *queryDeviceIdentificationResponse); }); } void processEndpoint( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const SensorConfigs& configs, const std::string& path, const boost::system::error_code& ec, const SensorBaseConfigMap& endpoint) { if (ec) { lg2::error("Error processing MCTP endpoint: Error:{ERROR}", "ERROR", ec.message()); return; } auto hasEid = endpoint.find("EID"); uint8_t eid{}; if (hasEid != endpoint.end()) { const auto* eidPtr = std::get_if(&hasEid->second); if (eidPtr != nullptr) { eid = *eidPtr; } else { lg2::error( "Error processing MCTP endpoint: Property EID does not have valid type."); return; } } else { lg2::error( "Error processing MCTP endpoint: Property EID not found in the configuration."); return; } auto hasMctpTypes = endpoint.find("SupportedMessageTypes"); std::vector mctpTypes{}; if (hasMctpTypes != endpoint.end()) { const auto* mctpTypePtr = std::get_if>(&hasMctpTypes->second); if (mctpTypePtr != nullptr) { mctpTypes = *mctpTypePtr; } else { lg2::error( "Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes does not have valid type.", "EID", eid); return; } } else { lg2::error( "Error processing MCTP endpoint with eid {EID} : Property SupportedMessageTypes not found in the configuration.", "EID", eid); return; } if (std::find(mctpTypes.begin(), mctpTypes.end(), ocp::accelerator_management::messageType) != mctpTypes.end()) { lg2::info("Found OCP MCTP VDM Endpoint with ID {EID}", "EID", eid); queryDeviceIdentification(io, objectServer, gpuDevices, smaDevices, conn, mctpRequester, configs, path, eid); } } void queryEndpoints( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const SensorConfigs& configs, const std::string& path, const boost::system::error_code& ec, const GetSubTreeType& ret) { if (ec) { lg2::error("Error processing MCTP endpoints: {ERROR}", "ERROR", ec.message()); return; } if (ret.empty()) { return; } for (const auto& [objPath, services] : ret) { for (const auto& [service, ifaces] : services) { for (const auto& iface : ifaces) { if (iface == "xyz.openbmc_project.MCTP.Endpoint") { conn->async_method_call( [&io, &objectServer, &gpuDevices, &smaDevices, conn, &mctpRequester, configs, path](const boost::system::error_code& ec, const SensorBaseConfigMap& endpoint) { processEndpoint(io, objectServer, gpuDevices, smaDevices, conn, mctpRequester, configs, path, ec, endpoint); }, service, objPath, "org.freedesktop.DBus.Properties", "GetAll", iface); } } } } } void discoverDevices( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const SensorConfigs& configs, const std::string& path) { std::string searchPath{"/au/com/codeconstruct/"}; std::vector ifaceList{{"xyz.openbmc_project.MCTP.Endpoint"}}; conn->async_method_call( [&io, &objectServer, &gpuDevices, &smaDevices, conn, &mctpRequester, configs, path](const boost::system::error_code& ec, const GetSubTreeType& ret) { queryEndpoints(io, objectServer, gpuDevices, smaDevices, conn, mctpRequester, configs, path, ec, ret); }, "xyz.openbmc_project.ObjectMapper", "/xyz/openbmc_project/object_mapper", "xyz.openbmc_project.ObjectMapper", "GetSubTree", searchPath, 0, ifaceList); } void processSensorConfigs( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& dbusConnection, mctp::MctpRequester& mctpRequester, const ManagedObjectType& resp) { for (const auto& [path, interfaces] : resp) { for (const auto& [intf, cfg] : interfaces) { if (intf != configInterfaceName(deviceType)) { continue; } SensorConfigs configs; configs.name = loadVariant(cfg, "Name"); try { configs.pollRate = loadVariant(cfg, "PollRate"); } catch (const std::invalid_argument&) { // PollRate is an optional config configs.pollRate = sensorPollRateMs; } discoverDevices(io, objectServer, gpuDevices, smaDevices, dbusConnection, mctpRequester, configs, path); lg2::info( "Detected configuration {NAME} of type {TYPE} at path: {PATH}.", "NAME", configs.name, "TYPE", deviceType, "PATH", path); } } } void createSensors( boost::asio::io_context& io, sdbusplus::asio::object_server& objectServer, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices, const std::shared_ptr& dbusConnection, mctp::MctpRequester& mctpRequester) { if (!dbusConnection) { lg2::error("Connection not created"); return; } dbusConnection->async_method_call( [&gpuDevices, &smaDevices, &mctpRequester, dbusConnection, &io, &objectServer](boost::system::error_code ec, const ManagedObjectType& resp) { if (ec) { lg2::error("Error contacting entity manager"); return; } processSensorConfigs(io, objectServer, gpuDevices, smaDevices, dbusConnection, mctpRequester, resp); }, entityManagerName, "/xyz/openbmc_project/inventory", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects"); } void interfaceRemoved( sdbusplus::message_t& message, boost::container::flat_map>& gpuDevices, boost::container::flat_map>& smaDevices) { if (message.is_method_error()) { lg2::error("interfacesRemoved callback method error"); return; } sdbusplus::message::object_path removedPath; std::vector interfaces; message.read(removedPath, interfaces); // If the xyz.openbmc_project.Confguration.X interface was removed // for one or more sensors, delete those sensor objects. auto sensorIt = gpuDevices.begin(); while (sensorIt != gpuDevices.end()) { if ((sensorIt->second->getPath() == removedPath) && (std::find(interfaces.begin(), interfaces.end(), configInterfaceName(deviceType)) != interfaces.end())) { sensorIt = gpuDevices.erase(sensorIt); } else { sensorIt++; } } auto smaSensorIt = smaDevices.begin(); while (smaSensorIt != smaDevices.end()) { if ((smaSensorIt->second->getPath() == removedPath) && (std::find(interfaces.begin(), interfaces.end(), configInterfaceName(deviceType)) != interfaces.end())) { smaSensorIt = smaDevices.erase(smaSensorIt); } else { smaSensorIt++; } } }