xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp (revision 68a8e2dd92a09d7f1735cce0cbd4aa722816e8ae)
1 /*
2  * SPDX-FileCopyrightText: Copyright OpenBMC Authors
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <OcpMctpVdm.hpp>
9 
10 #include <array>
11 #include <cstddef>
12 #include <cstdint>
13 #include <span>
14 #include <string>
15 #include <variant>
16 #include <vector>
17 
18 namespace gpu
19 {
20 
21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
22 constexpr size_t maxInventoryDataSize = 256;
23 
24 constexpr uint16_t nvidiaPciVendorId = 0x10de;
25 
26 enum class MessageType : uint8_t
27 {
28     DEVICE_CAPABILITY_DISCOVERY = 0,
29     PCIE_LINK = 2,
30     PLATFORM_ENVIRONMENTAL = 3
31 };
32 
33 enum class DeviceCapabilityDiscoveryCommands : uint8_t
34 {
35     QUERY_DEVICE_IDENTIFICATION = 0x09,
36 };
37 
38 enum class PlatformEnvironmentalCommands : uint8_t
39 {
40     GET_TEMPERATURE_READING = 0x00,
41     READ_THERMAL_PARAMETERS = 0x02,
42     GET_CURRENT_POWER_DRAW = 0x03,
43     GET_MAX_OBSERVED_POWER = 0x04,
44     GET_CURRENT_ENERGY_COUNTER = 0x06,
45     GET_INVENTORY_INFORMATION = 0x0C,
46     GET_VOLTAGE = 0x0F,
47 };
48 
49 enum class PcieLinkCommands : uint8_t
50 {
51     ListPCIePorts = 0x07,
52     QueryScalarGroupTelemetryV2 = 0x24,
53 };
54 
55 enum class DeviceIdentification : uint8_t
56 {
57     DEVICE_GPU = 0,
58     DEVICE_PCIE = 2,
59     DEVICE_SMA = 5
60 };
61 
62 enum class InventoryPropertyId : uint8_t
63 {
64     BOARD_PART_NUMBER = 0,
65     SERIAL_NUMBER = 1,
66     MARKETING_NAME = 2,
67     DEVICE_PART_NUMBER = 3,
68     FRU_PART_NUMBER = 4,
69     MEMORY_VENDOR = 5,
70     MEMORY_PART_NUMBER = 6,
71     MAX_MEMORY_CAPACITY = 7,
72     BUILD_DATE = 8,
73     FIRMWARE_VERSION = 9,
74     DEVICE_GUID = 10,
75     INFOROM_VERSION = 11,
76     PRODUCT_LENGTH = 12,
77     PRODUCT_WIDTH = 13,
78     PRODUCT_HEIGHT = 14,
79     RATED_DEVICE_POWER_LIMIT = 15,
80     MIN_DEVICE_POWER_LIMIT = 16,
81     MAX_DEVICE_POWER_LIMIT = 17,
82     MAX_MODULE_POWER_LIMIT = 18,
83     MIN_MODULE_POWER_LIMIT = 19,
84     RATED_MODULE_POWER_LIMIT = 20,
85     DEFAULT_BOOST_CLOCKS = 21,
86     DEFAULT_BASE_CLOCKS = 22,
87     DEFAULT_EDPP_SCALING = 23,
88     MIN_EDPP_SCALING = 24,
89     MAX_EDPP_SCALING = 25,
90     MIN_GRAPHICS_CLOCK = 26,
91     MAX_GRAPHICS_CLOCK = 27,
92     MIN_MEMORY_CLOCK = 28,
93     MAX_MEMORY_CLOCK = 29,
94     INFINIBAND_GUID = 30,
95     RACK_GUID = 31,
96     RACK_SLOT_NUMBER = 32,
97     COMPUTE_SLOT_INDEX = 33,
98     NODE_INDEX = 34,
99     GPU_NODE_ID = 35,
100     NVLINK_PEER_TYPE = 36
101 };
102 
103 enum class PciePortType : uint8_t
104 {
105     UPSTREAM = 0,
106     DOWNSTREAM = 1,
107 };
108 
109 struct QueryDeviceIdentificationRequest
110 {
111     ocp::accelerator_management::CommonRequest hdr;
112 } __attribute__((packed));
113 
114 struct QueryDeviceIdentificationResponse
115 {
116     ocp::accelerator_management::CommonResponse hdr;
117     uint8_t device_identification;
118     uint8_t instance_id;
119 } __attribute__((packed));
120 
121 struct GetNumericSensorReadingRequest
122 {
123     ocp::accelerator_management::CommonRequest hdr;
124     uint8_t sensor_id;
125 } __attribute__((packed));
126 
127 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
128 
129 using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
130 
131 struct GetPowerDrawRequest
132 {
133     ocp::accelerator_management::CommonRequest hdr;
134     uint8_t sensorId;
135     uint8_t averagingInterval;
136 } __attribute__((packed));
137 
138 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
139 
140 using GetVoltageRequest = GetNumericSensorReadingRequest;
141 
142 struct QueryScalarGroupTelemetryV2Request
143 {
144     ocp::accelerator_management::CommonRequest hdr;
145     uint8_t upstreamPortNumber;
146     uint8_t portNumber;
147     uint8_t groupId;
148 } __attribute__((packed));
149 
150 struct GetTemperatureReadingResponse
151 {
152     ocp::accelerator_management::CommonResponse hdr;
153     int32_t reading;
154 } __attribute__((packed));
155 
156 struct ReadThermalParametersResponse
157 {
158     ocp::accelerator_management::CommonResponse hdr;
159     int32_t threshold;
160 } __attribute__((packed));
161 
162 struct GetPowerDrawResponse
163 {
164     ocp::accelerator_management::CommonResponse hdr;
165     uint32_t power;
166 } __attribute__((packed));
167 
168 struct GetCurrentEnergyCounterResponse
169 {
170     ocp::accelerator_management::CommonResponse hdr;
171     uint64_t energy;
172 } __attribute__((packed));
173 
174 struct GetVoltageResponse
175 {
176     ocp::accelerator_management::CommonResponse hdr;
177     uint32_t voltage;
178 } __attribute__((packed));
179 
180 struct ListPCIePortsResponse
181 {
182     ocp::accelerator_management::CommonResponse hdr;
183     uint16_t numUpstreamPorts;
184 } __attribute__((packed));
185 
186 struct ListPCIePortsDownstreamPortsData
187 {
188     uint8_t isInternal;
189     uint8_t count;
190 } __attribute__((packed));
191 
192 struct GetInventoryInformationRequest
193 {
194     ocp::accelerator_management::CommonRequest hdr;
195     uint8_t property_id;
196 } __attribute__((packed));
197 
198 struct GetInventoryInformationResponse
199 {
200     ocp::accelerator_management::CommonResponse hdr;
201     std::array<uint8_t, maxInventoryDataSize> data;
202 } __attribute__((packed));
203 
204 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
205                ocp::accelerator_management::BindingPciVid& msg);
206 
207 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
208                                            std::span<uint8_t> buf);
209 
210 int decodeQueryDeviceIdentificationResponse(
211     std::span<const uint8_t> buf,
212     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
213     uint8_t& deviceIdentification, uint8_t& deviceInstance);
214 
215 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
216                                        std::span<uint8_t> buf);
217 
218 int decodeGetTemperatureReadingResponse(
219     std::span<const uint8_t> buf,
220     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
221     double& temperatureReading);
222 
223 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
224                                        std::span<uint8_t> buf);
225 
226 int decodeReadThermalParametersResponse(
227     std::span<const uint8_t> buf,
228     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
229     int32_t& threshold);
230 
231 int encodeGetPowerDrawRequest(
232     PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
233     uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
234 
235 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
236                                ocp::accelerator_management::CompletionCode& cc,
237                                uint16_t& reasonCode, uint32_t& power);
238 
239 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
240                                          std::span<uint8_t> buf);
241 
242 int decodeGetCurrentEnergyCounterResponse(
243     std::span<const uint8_t> buf,
244     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
245     uint64_t& energy);
246 
247 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
248                             std::span<uint8_t> buf);
249 
250 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
251                              ocp::accelerator_management::CompletionCode& cc,
252                              uint16_t& reasonCode, uint32_t& voltage);
253 
254 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
255                                          std::span<uint8_t> buf);
256 
257 int decodeGetInventoryInformationResponse(
258     std::span<const uint8_t> buf,
259     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
260     InventoryPropertyId propertyId, InventoryValue& value);
261 
262 int encodeQueryScalarGroupTelemetryV2Request(
263     uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber,
264     uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf);
265 
266 int decodeQueryScalarGroupTelemetryV2Response(
267     std::span<const uint8_t> buf,
268     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
269     size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues);
270 
271 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf);
272 
273 int decodeListPciePortsResponse(
274     std::span<const uint8_t> buf,
275     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
276     uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts);
277 
278 } // namespace gpu
279