1*560e6af7SHarshit Aghera /*
2*560e6af7SHarshit Aghera * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3*560e6af7SHarshit Aghera * AFFILIATES. All rights reserved.
4*560e6af7SHarshit Aghera * SPDX-License-Identifier: Apache-2.0
5*560e6af7SHarshit Aghera */
6*560e6af7SHarshit Aghera
7*560e6af7SHarshit Aghera #include "NvidiaGpuMctpVdm.hpp"
8*560e6af7SHarshit Aghera
9*560e6af7SHarshit Aghera #include "OcpMctpVdm.hpp"
10*560e6af7SHarshit Aghera
11*560e6af7SHarshit Aghera #include <endian.h>
12*560e6af7SHarshit Aghera
13*560e6af7SHarshit Aghera #include <cerrno>
14*560e6af7SHarshit Aghera #include <cstdint>
15*560e6af7SHarshit Aghera #include <cstring>
16*560e6af7SHarshit Aghera #include <span>
17*560e6af7SHarshit Aghera
18*560e6af7SHarshit Aghera namespace gpu
19*560e6af7SHarshit Aghera {
20*560e6af7SHarshit Aghera // These functions encode/decode data communicated over the network
21*560e6af7SHarshit Aghera // The use of reinterpret_cast enables direct memory access to raw byte buffers
22*560e6af7SHarshit Aghera // without doing unnecessary data copying
23*560e6af7SHarshit Aghera // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
packHeader(const ocp::accelerator_management::BindingPciVidInfo & hdr,ocp::accelerator_management::BindingPciVid & msg)24*560e6af7SHarshit Aghera int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
25*560e6af7SHarshit Aghera ocp::accelerator_management::BindingPciVid& msg)
26*560e6af7SHarshit Aghera {
27*560e6af7SHarshit Aghera return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
28*560e6af7SHarshit Aghera }
29*560e6af7SHarshit Aghera
encodeQueryDeviceIdentificationRequest(uint8_t instanceId,const std::span<uint8_t> buf)30*560e6af7SHarshit Aghera int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
31*560e6af7SHarshit Aghera const std::span<uint8_t> buf)
32*560e6af7SHarshit Aghera {
33*560e6af7SHarshit Aghera if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
34*560e6af7SHarshit Aghera {
35*560e6af7SHarshit Aghera return EINVAL;
36*560e6af7SHarshit Aghera }
37*560e6af7SHarshit Aghera
38*560e6af7SHarshit Aghera auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
39*560e6af7SHarshit Aghera
40*560e6af7SHarshit Aghera ocp::accelerator_management::BindingPciVidInfo header{};
41*560e6af7SHarshit Aghera
42*560e6af7SHarshit Aghera header.ocp_accelerator_management_msg_type =
43*560e6af7SHarshit Aghera static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
44*560e6af7SHarshit Aghera header.instance_id = instanceId &
45*560e6af7SHarshit Aghera ocp::accelerator_management::instanceIdBitMask;
46*560e6af7SHarshit Aghera header.msg_type =
47*560e6af7SHarshit Aghera static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
48*560e6af7SHarshit Aghera
49*560e6af7SHarshit Aghera auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
50*560e6af7SHarshit Aghera
51*560e6af7SHarshit Aghera if (rc != 0)
52*560e6af7SHarshit Aghera {
53*560e6af7SHarshit Aghera return rc;
54*560e6af7SHarshit Aghera }
55*560e6af7SHarshit Aghera
56*560e6af7SHarshit Aghera msg->hdr.command = static_cast<uint8_t>(
57*560e6af7SHarshit Aghera DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
58*560e6af7SHarshit Aghera msg->hdr.data_size = 0;
59*560e6af7SHarshit Aghera
60*560e6af7SHarshit Aghera return 0;
61*560e6af7SHarshit Aghera }
62*560e6af7SHarshit Aghera
decodeQueryDeviceIdentificationResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint8_t & deviceIdentification,uint8_t & deviceInstance)63*560e6af7SHarshit Aghera int decodeQueryDeviceIdentificationResponse(
64*560e6af7SHarshit Aghera const std::span<const uint8_t> buf,
65*560e6af7SHarshit Aghera ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
66*560e6af7SHarshit Aghera uint8_t& deviceIdentification, uint8_t& deviceInstance)
67*560e6af7SHarshit Aghera {
68*560e6af7SHarshit Aghera auto rc =
69*560e6af7SHarshit Aghera ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
70*560e6af7SHarshit Aghera
71*560e6af7SHarshit Aghera if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
72*560e6af7SHarshit Aghera {
73*560e6af7SHarshit Aghera return rc;
74*560e6af7SHarshit Aghera }
75*560e6af7SHarshit Aghera
76*560e6af7SHarshit Aghera if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
77*560e6af7SHarshit Aghera {
78*560e6af7SHarshit Aghera return EINVAL;
79*560e6af7SHarshit Aghera }
80*560e6af7SHarshit Aghera
81*560e6af7SHarshit Aghera const auto* response =
82*560e6af7SHarshit Aghera reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
83*560e6af7SHarshit Aghera
84*560e6af7SHarshit Aghera deviceIdentification = response->device_identification;
85*560e6af7SHarshit Aghera deviceInstance = response->instance_id;
86*560e6af7SHarshit Aghera
87*560e6af7SHarshit Aghera return 0;
88*560e6af7SHarshit Aghera }
89*560e6af7SHarshit Aghera
encodeGetTemperatureReadingRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)90*560e6af7SHarshit Aghera int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
91*560e6af7SHarshit Aghera std::span<uint8_t> buf)
92*560e6af7SHarshit Aghera {
93*560e6af7SHarshit Aghera if (buf.size() < sizeof(GetTemperatureReadingRequest))
94*560e6af7SHarshit Aghera {
95*560e6af7SHarshit Aghera return EINVAL;
96*560e6af7SHarshit Aghera }
97*560e6af7SHarshit Aghera
98*560e6af7SHarshit Aghera auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
99*560e6af7SHarshit Aghera
100*560e6af7SHarshit Aghera ocp::accelerator_management::BindingPciVidInfo header{};
101*560e6af7SHarshit Aghera header.ocp_accelerator_management_msg_type =
102*560e6af7SHarshit Aghera static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
103*560e6af7SHarshit Aghera header.instance_id = instanceId &
104*560e6af7SHarshit Aghera ocp::accelerator_management::instanceIdBitMask;
105*560e6af7SHarshit Aghera header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
106*560e6af7SHarshit Aghera
107*560e6af7SHarshit Aghera auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
108*560e6af7SHarshit Aghera
109*560e6af7SHarshit Aghera if (rc != 0)
110*560e6af7SHarshit Aghera {
111*560e6af7SHarshit Aghera return rc;
112*560e6af7SHarshit Aghera }
113*560e6af7SHarshit Aghera
114*560e6af7SHarshit Aghera msg->hdr.command = static_cast<uint8_t>(
115*560e6af7SHarshit Aghera PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
116*560e6af7SHarshit Aghera msg->hdr.data_size = sizeof(sensorId);
117*560e6af7SHarshit Aghera msg->sensor_id = sensorId;
118*560e6af7SHarshit Aghera
119*560e6af7SHarshit Aghera return 0;
120*560e6af7SHarshit Aghera }
121*560e6af7SHarshit Aghera
decodeGetTemperatureReadingResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,double & temperatureReading)122*560e6af7SHarshit Aghera int decodeGetTemperatureReadingResponse(
123*560e6af7SHarshit Aghera const std::span<const uint8_t> buf,
124*560e6af7SHarshit Aghera ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
125*560e6af7SHarshit Aghera double& temperatureReading)
126*560e6af7SHarshit Aghera {
127*560e6af7SHarshit Aghera auto rc =
128*560e6af7SHarshit Aghera ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
129*560e6af7SHarshit Aghera
130*560e6af7SHarshit Aghera if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
131*560e6af7SHarshit Aghera {
132*560e6af7SHarshit Aghera return rc;
133*560e6af7SHarshit Aghera }
134*560e6af7SHarshit Aghera
135*560e6af7SHarshit Aghera if (buf.size() < sizeof(GetTemperatureReadingResponse))
136*560e6af7SHarshit Aghera {
137*560e6af7SHarshit Aghera return EINVAL;
138*560e6af7SHarshit Aghera }
139*560e6af7SHarshit Aghera
140*560e6af7SHarshit Aghera const auto* response =
141*560e6af7SHarshit Aghera reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
142*560e6af7SHarshit Aghera
143*560e6af7SHarshit Aghera uint16_t dataSize = le16toh(response->hdr.data_size);
144*560e6af7SHarshit Aghera
145*560e6af7SHarshit Aghera if (dataSize != sizeof(int32_t))
146*560e6af7SHarshit Aghera {
147*560e6af7SHarshit Aghera return EINVAL;
148*560e6af7SHarshit Aghera }
149*560e6af7SHarshit Aghera
150*560e6af7SHarshit Aghera int32_t reading = le32toh(response->reading);
151*560e6af7SHarshit Aghera temperatureReading = reading / static_cast<double>(1 << 8);
152*560e6af7SHarshit Aghera
153*560e6af7SHarshit Aghera return 0;
154*560e6af7SHarshit Aghera }
155*560e6af7SHarshit Aghera // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
156*560e6af7SHarshit Aghera } // namespace gpu
157