xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp (revision b341fa2b68ff4d10c3eca0f58a16448b475d1cff)
1 /*
2  * SPDX-FileCopyrightText: Copyright OpenBMC Authors
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <OcpMctpVdm.hpp>
9 
10 #include <array>
11 #include <cstddef>
12 #include <cstdint>
13 #include <span>
14 #include <string>
15 #include <variant>
16 #include <vector>
17 
18 namespace gpu
19 {
20 
21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
22 constexpr size_t maxInventoryDataSize = 256;
23 
24 constexpr uint16_t nvidiaPciVendorId = 0x10de;
25 
26 enum class MessageType : uint8_t
27 {
28     DEVICE_CAPABILITY_DISCOVERY = 0,
29     PCIE_LINK = 2,
30     PLATFORM_ENVIRONMENTAL = 3
31 };
32 
33 enum class DeviceCapabilityDiscoveryCommands : uint8_t
34 {
35     QUERY_DEVICE_IDENTIFICATION = 0x09,
36 };
37 
38 enum class PlatformEnvironmentalCommands : uint8_t
39 {
40     GET_TEMPERATURE_READING = 0x00,
41     READ_THERMAL_PARAMETERS = 0x02,
42     GET_CURRENT_POWER_DRAW = 0x03,
43     GET_MAX_OBSERVED_POWER = 0x04,
44     GET_CURRENT_ENERGY_COUNTER = 0x06,
45     GET_INVENTORY_INFORMATION = 0x0C,
46     GET_DRIVER_INFORMATION = 0x0E,
47     GET_VOLTAGE = 0x0F,
48 };
49 
50 enum class PcieLinkCommands : uint8_t
51 {
52     ListPCIePorts = 0x07,
53     QueryScalarGroupTelemetryV2 = 0x24,
54 };
55 
56 enum class DeviceIdentification : uint8_t
57 {
58     DEVICE_GPU = 0,
59     DEVICE_PCIE = 2,
60     DEVICE_SMA = 5
61 };
62 
63 enum class InventoryPropertyId : uint8_t
64 {
65     BOARD_PART_NUMBER = 0,
66     SERIAL_NUMBER = 1,
67     MARKETING_NAME = 2,
68     DEVICE_PART_NUMBER = 3,
69     FRU_PART_NUMBER = 4,
70     MEMORY_VENDOR = 5,
71     MEMORY_PART_NUMBER = 6,
72     MAX_MEMORY_CAPACITY = 7,
73     BUILD_DATE = 8,
74     FIRMWARE_VERSION = 9,
75     DEVICE_GUID = 10,
76     INFOROM_VERSION = 11,
77     PRODUCT_LENGTH = 12,
78     PRODUCT_WIDTH = 13,
79     PRODUCT_HEIGHT = 14,
80     RATED_DEVICE_POWER_LIMIT = 15,
81     MIN_DEVICE_POWER_LIMIT = 16,
82     MAX_DEVICE_POWER_LIMIT = 17,
83     MAX_MODULE_POWER_LIMIT = 18,
84     MIN_MODULE_POWER_LIMIT = 19,
85     RATED_MODULE_POWER_LIMIT = 20,
86     DEFAULT_BOOST_CLOCKS = 21,
87     DEFAULT_BASE_CLOCKS = 22,
88     DEFAULT_EDPP_SCALING = 23,
89     MIN_EDPP_SCALING = 24,
90     MAX_EDPP_SCALING = 25,
91     MIN_GRAPHICS_CLOCK = 26,
92     MAX_GRAPHICS_CLOCK = 27,
93     MIN_MEMORY_CLOCK = 28,
94     MAX_MEMORY_CLOCK = 29,
95     INFINIBAND_GUID = 30,
96     RACK_GUID = 31,
97     RACK_SLOT_NUMBER = 32,
98     COMPUTE_SLOT_INDEX = 33,
99     NODE_INDEX = 34,
100     GPU_NODE_ID = 35,
101     NVLINK_PEER_TYPE = 36
102 };
103 
104 enum class PciePortType : uint8_t
105 {
106     UPSTREAM = 0,
107     DOWNSTREAM = 1,
108 };
109 
110 enum class DriverState : uint8_t
111 {
112     DRIVER_STATE_UNKNOWN = 0,
113     DRIVER_STATE_NOT_LOADED = 1,
114     DRIVER_STATE_LOADED = 2,
115 };
116 
117 struct QueryDeviceIdentificationRequest
118 {
119     ocp::accelerator_management::CommonRequest hdr;
120 } __attribute__((packed));
121 
122 struct QueryDeviceIdentificationResponse
123 {
124     ocp::accelerator_management::CommonResponse hdr;
125     uint8_t device_identification;
126     uint8_t instance_id;
127 } __attribute__((packed));
128 
129 struct GetNumericSensorReadingRequest
130 {
131     ocp::accelerator_management::CommonRequest hdr;
132     uint8_t sensor_id;
133 } __attribute__((packed));
134 
135 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
136 
137 using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
138 
139 struct GetPowerDrawRequest
140 {
141     ocp::accelerator_management::CommonRequest hdr;
142     uint8_t sensorId;
143     uint8_t averagingInterval;
144 } __attribute__((packed));
145 
146 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
147 
148 using GetVoltageRequest = GetNumericSensorReadingRequest;
149 
150 struct QueryScalarGroupTelemetryV2Request
151 {
152     ocp::accelerator_management::CommonRequest hdr;
153     uint8_t upstreamPortNumber;
154     uint8_t portNumber;
155     uint8_t groupId;
156 } __attribute__((packed));
157 
158 struct GetTemperatureReadingResponse
159 {
160     ocp::accelerator_management::CommonResponse hdr;
161     int32_t reading;
162 } __attribute__((packed));
163 
164 struct ReadThermalParametersResponse
165 {
166     ocp::accelerator_management::CommonResponse hdr;
167     int32_t threshold;
168 } __attribute__((packed));
169 
170 struct GetPowerDrawResponse
171 {
172     ocp::accelerator_management::CommonResponse hdr;
173     uint32_t power;
174 } __attribute__((packed));
175 
176 struct GetCurrentEnergyCounterResponse
177 {
178     ocp::accelerator_management::CommonResponse hdr;
179     uint64_t energy;
180 } __attribute__((packed));
181 
182 struct GetVoltageResponse
183 {
184     ocp::accelerator_management::CommonResponse hdr;
185     uint32_t voltage;
186 } __attribute__((packed));
187 
188 struct ListPCIePortsResponse
189 {
190     ocp::accelerator_management::CommonResponse hdr;
191     uint16_t numUpstreamPorts;
192 } __attribute__((packed));
193 
194 struct ListPCIePortsDownstreamPortsData
195 {
196     uint8_t isInternal;
197     uint8_t count;
198 } __attribute__((packed));
199 
200 struct GetDriverInformationResponse
201 {
202     ocp::accelerator_management::CommonResponse hdr;
203     DriverState driverState;
204     char driverVersion;
205 } __attribute__((packed));
206 
207 struct GetInventoryInformationRequest
208 {
209     ocp::accelerator_management::CommonRequest hdr;
210     uint8_t property_id;
211 } __attribute__((packed));
212 
213 struct GetInventoryInformationResponse
214 {
215     ocp::accelerator_management::CommonResponse hdr;
216     std::array<uint8_t, maxInventoryDataSize> data;
217 } __attribute__((packed));
218 
219 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
220                ocp::accelerator_management::BindingPciVid& msg);
221 
222 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
223                                            std::span<uint8_t> buf);
224 
225 int decodeQueryDeviceIdentificationResponse(
226     std::span<const uint8_t> buf,
227     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
228     uint8_t& deviceIdentification, uint8_t& deviceInstance);
229 
230 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
231                                        std::span<uint8_t> buf);
232 
233 int decodeGetTemperatureReadingResponse(
234     std::span<const uint8_t> buf,
235     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
236     double& temperatureReading);
237 
238 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
239                                        std::span<uint8_t> buf);
240 
241 int decodeReadThermalParametersResponse(
242     std::span<const uint8_t> buf,
243     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
244     int32_t& threshold);
245 
246 int encodeGetPowerDrawRequest(
247     PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
248     uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
249 
250 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
251                                ocp::accelerator_management::CompletionCode& cc,
252                                uint16_t& reasonCode, uint32_t& power);
253 
254 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
255                                          std::span<uint8_t> buf);
256 
257 int decodeGetCurrentEnergyCounterResponse(
258     std::span<const uint8_t> buf,
259     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
260     uint64_t& energy);
261 
262 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
263                             std::span<uint8_t> buf);
264 
265 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
266                              ocp::accelerator_management::CompletionCode& cc,
267                              uint16_t& reasonCode, uint32_t& voltage);
268 
269 int encodeGetDriverInformationRequest(uint8_t instanceId,
270                                       std::span<uint8_t> buf);
271 
272 int decodeGetDriverInformationResponse(
273     std::span<const uint8_t> buf,
274     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
275     DriverState& driverState, std::string& driverVersion);
276 
277 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
278                                          std::span<uint8_t> buf);
279 
280 int decodeGetInventoryInformationResponse(
281     std::span<const uint8_t> buf,
282     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
283     InventoryPropertyId propertyId, InventoryValue& value);
284 
285 int encodeQueryScalarGroupTelemetryV2Request(
286     uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber,
287     uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf);
288 
289 int decodeQueryScalarGroupTelemetryV2Response(
290     std::span<const uint8_t> buf,
291     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
292     size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues);
293 
294 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf);
295 
296 int decodeListPciePortsResponse(
297     std::span<const uint8_t> buf,
298     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
299     uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts);
300 
301 } // namespace gpu
302