/* * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & * AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "NvidiaGpuMctpVdm.hpp" #include "OcpMctpVdm.hpp" #include #include #include #include #include namespace gpu { // These functions encode/decode data communicated over the network // The use of reinterpret_cast enables direct memory access to raw byte buffers // without doing unnecessary data copying // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, ocp::accelerator_management::BindingPciVid& msg) { return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg); } int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, const std::span buf) { if (buf.size() < sizeof(QueryDeviceIdentificationRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::DEVICE_CAPABILITY_DISCOVERY); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION); msg->hdr.data_size = 0; return 0; } int decodeQueryDeviceIdentificationResponse( const std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, uint8_t& deviceIdentification, uint8_t& deviceInstance) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(QueryDeviceIdentificationResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); deviceIdentification = response->device_identification; deviceInstance = response->instance_id; return 0; } int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, std::span buf) { if (buf.size() < sizeof(GetTemperatureReadingRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( PlatformEnvironmentalCommands::GET_TEMPERATURE_READING); msg->hdr.data_size = sizeof(sensorId); msg->sensor_id = sensorId; return 0; } int decodeGetTemperatureReadingResponse( const std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, double& temperatureReading) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(GetTemperatureReadingResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize != sizeof(int32_t)) { return EINVAL; } int32_t reading = le32toh(response->reading); temperatureReading = reading / static_cast(1 << 8); return 0; } int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, std::span buf) { if (buf.size() < sizeof(ReadThermalParametersRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS); msg->hdr.data_size = sizeof(sensorId); msg->sensor_id = sensorId; return 0; } int decodeReadThermalParametersResponse( std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, int32_t& threshold) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(ReadThermalParametersResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize != sizeof(int32_t)) { return EINVAL; } threshold = le32toh(response->threshold); return 0; } int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId, uint8_t averagingInterval, std::span buf) { if (buf.size() < sizeof(GetCurrentPowerDrawRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW); msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval); msg->sensorId = sensorId; msg->averagingInterval = averagingInterval; return 0; } int decodeGetCurrentPowerDrawResponse( std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, uint32_t& power) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(GetCurrentPowerDrawResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); const uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize != sizeof(uint32_t)) { return EINVAL; } power = le32toh(response->power); return 0; } int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, std::span buf) { if (buf.size() < sizeof(GetTemperatureReadingRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER); msg->hdr.data_size = sizeof(sensorId); msg->sensor_id = sensorId; return 0; } int decodeGetCurrentEnergyCounterResponse( std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, uint64_t& energy) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(GetCurrentPowerDrawResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); const uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize != sizeof(uint64_t)) { return EINVAL; } energy = le32toh(response->energy); return 0; } int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, std::span buf) { if (buf.size() < sizeof(GetVoltageRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast(PlatformEnvironmentalCommands::GET_VOLTAGE); msg->hdr.data_size = sizeof(sensorId); msg->sensor_id = sensorId; return 0; } int decodeGetVoltageResponse(std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, uint32_t& voltage) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } if (buf.size() < sizeof(GetVoltageResponse)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); const uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize != sizeof(uint32_t)) { return EINVAL; } voltage = le32toh(response->voltage); return 0; } int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, std::span buf) { if (buf.size() < sizeof(GetInventoryInformationRequest)) { return EINVAL; } auto* msg = reinterpret_cast(buf.data()); ocp::accelerator_management::BindingPciVidInfo header{}; header.ocp_accelerator_management_msg_type = static_cast(ocp::accelerator_management::MessageType::REQUEST); header.instance_id = instanceId & ocp::accelerator_management::instanceIdBitMask; header.msg_type = static_cast(MessageType::PLATFORM_ENVIRONMENTAL); auto rc = packHeader(header, msg->hdr.msgHdr.hdr); if (rc != 0) { return rc; } msg->hdr.command = static_cast( PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION); msg->hdr.data_size = sizeof(propertyId); msg->property_id = propertyId; return 0; } int decodeGetInventoryInformationResponse( std::span buf, ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, InventoryPropertyId propertyId, InventoryValue& value) { auto rc = ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { return rc; } // Expect at least one byte of inventory response data after common response if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1)) { return EINVAL; } const auto* response = reinterpret_cast(buf.data()); uint16_t dataSize = le16toh(response->hdr.data_size); if (dataSize == 0 || dataSize > maxInventoryDataSize) { return EINVAL; } const uint8_t* dataPtr = response->data.data(); switch (propertyId) { case InventoryPropertyId::BOARD_PART_NUMBER: case InventoryPropertyId::SERIAL_NUMBER: case InventoryPropertyId::MARKETING_NAME: case InventoryPropertyId::DEVICE_PART_NUMBER: value = std::string(reinterpret_cast(dataPtr), dataSize); break; case InventoryPropertyId::DEVICE_GUID: value = std::vector(dataPtr, dataPtr + dataSize); break; default: return EINVAL; } return 0; } // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) } // namespace gpu