xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp (revision e0b80e1e58bddcf218369f2f9e3ba2002b59b6f9)
1 /*
2  * SPDX-FileCopyrightText: Copyright OpenBMC Authors
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <OcpMctpVdm.hpp>
9 
10 #include <array>
11 #include <cstddef>
12 #include <cstdint>
13 #include <span>
14 #include <string>
15 #include <variant>
16 #include <vector>
17 
18 namespace gpu
19 {
20 
21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
22 constexpr size_t maxInventoryDataSize = 256;
23 
24 constexpr uint16_t nvidiaPciVendorId = 0x10de;
25 
26 enum class MessageType : uint8_t
27 {
28     DEVICE_CAPABILITY_DISCOVERY = 0,
29     PCIE_LINK = 2,
30     PLATFORM_ENVIRONMENTAL = 3
31 };
32 
33 enum class DeviceCapabilityDiscoveryCommands : uint8_t
34 {
35     QUERY_DEVICE_IDENTIFICATION = 0x09,
36 };
37 
38 enum class PlatformEnvironmentalCommands : uint8_t
39 {
40     GET_TEMPERATURE_READING = 0x00,
41     READ_THERMAL_PARAMETERS = 0x02,
42     GET_CURRENT_POWER_DRAW = 0x03,
43     GET_MAX_OBSERVED_POWER = 0x04,
44     GET_CURRENT_ENERGY_COUNTER = 0x06,
45     GET_INVENTORY_INFORMATION = 0x0C,
46     GET_VOLTAGE = 0x0F,
47 };
48 
49 enum class PcieLinkCommands : uint8_t
50 {
51     QueryScalarGroupTelemetryV2 = 0x24,
52 };
53 
54 enum class DeviceIdentification : uint8_t
55 {
56     DEVICE_GPU = 0,
57     DEVICE_PCIE = 2,
58     DEVICE_SMA = 5
59 };
60 
61 enum class InventoryPropertyId : uint8_t
62 {
63     BOARD_PART_NUMBER = 0,
64     SERIAL_NUMBER = 1,
65     MARKETING_NAME = 2,
66     DEVICE_PART_NUMBER = 3,
67     FRU_PART_NUMBER = 4,
68     MEMORY_VENDOR = 5,
69     MEMORY_PART_NUMBER = 6,
70     MAX_MEMORY_CAPACITY = 7,
71     BUILD_DATE = 8,
72     FIRMWARE_VERSION = 9,
73     DEVICE_GUID = 10,
74     INFOROM_VERSION = 11,
75     PRODUCT_LENGTH = 12,
76     PRODUCT_WIDTH = 13,
77     PRODUCT_HEIGHT = 14,
78     RATED_DEVICE_POWER_LIMIT = 15,
79     MIN_DEVICE_POWER_LIMIT = 16,
80     MAX_DEVICE_POWER_LIMIT = 17,
81     MAX_MODULE_POWER_LIMIT = 18,
82     MIN_MODULE_POWER_LIMIT = 19,
83     RATED_MODULE_POWER_LIMIT = 20,
84     DEFAULT_BOOST_CLOCKS = 21,
85     DEFAULT_BASE_CLOCKS = 22,
86     DEFAULT_EDPP_SCALING = 23,
87     MIN_EDPP_SCALING = 24,
88     MAX_EDPP_SCALING = 25,
89     MIN_GRAPHICS_CLOCK = 26,
90     MAX_GRAPHICS_CLOCK = 27,
91     MIN_MEMORY_CLOCK = 28,
92     MAX_MEMORY_CLOCK = 29,
93     INFINIBAND_GUID = 30,
94     RACK_GUID = 31,
95     RACK_SLOT_NUMBER = 32,
96     COMPUTE_SLOT_INDEX = 33,
97     NODE_INDEX = 34,
98     GPU_NODE_ID = 35,
99     NVLINK_PEER_TYPE = 36
100 };
101 
102 enum class PciePortType : uint8_t
103 {
104     UPSTREAM = 0,
105     DOWNSTREAM = 1,
106 };
107 
108 struct QueryDeviceIdentificationRequest
109 {
110     ocp::accelerator_management::CommonRequest hdr;
111 } __attribute__((packed));
112 
113 struct QueryDeviceIdentificationResponse
114 {
115     ocp::accelerator_management::CommonResponse hdr;
116     uint8_t device_identification;
117     uint8_t instance_id;
118 } __attribute__((packed));
119 
120 struct GetNumericSensorReadingRequest
121 {
122     ocp::accelerator_management::CommonRequest hdr;
123     uint8_t sensor_id;
124 } __attribute__((packed));
125 
126 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
127 
128 using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
129 
130 struct GetPowerDrawRequest
131 {
132     ocp::accelerator_management::CommonRequest hdr;
133     uint8_t sensorId;
134     uint8_t averagingInterval;
135 } __attribute__((packed));
136 
137 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
138 
139 using GetVoltageRequest = GetNumericSensorReadingRequest;
140 
141 struct QueryScalarGroupTelemetryV2Request
142 {
143     ocp::accelerator_management::CommonRequest hdr;
144     uint8_t upstreamPortNumber;
145     uint8_t portNumber;
146     uint8_t groupId;
147 } __attribute__((packed));
148 
149 struct GetTemperatureReadingResponse
150 {
151     ocp::accelerator_management::CommonResponse hdr;
152     int32_t reading;
153 } __attribute__((packed));
154 
155 struct ReadThermalParametersResponse
156 {
157     ocp::accelerator_management::CommonResponse hdr;
158     int32_t threshold;
159 } __attribute__((packed));
160 
161 struct GetPowerDrawResponse
162 {
163     ocp::accelerator_management::CommonResponse hdr;
164     uint32_t power;
165 } __attribute__((packed));
166 
167 struct GetCurrentEnergyCounterResponse
168 {
169     ocp::accelerator_management::CommonResponse hdr;
170     uint64_t energy;
171 } __attribute__((packed));
172 
173 struct GetVoltageResponse
174 {
175     ocp::accelerator_management::CommonResponse hdr;
176     uint32_t voltage;
177 } __attribute__((packed));
178 
179 struct GetInventoryInformationRequest
180 {
181     ocp::accelerator_management::CommonRequest hdr;
182     uint8_t property_id;
183 } __attribute__((packed));
184 
185 struct GetInventoryInformationResponse
186 {
187     ocp::accelerator_management::CommonResponse hdr;
188     std::array<uint8_t, maxInventoryDataSize> data;
189 } __attribute__((packed));
190 
191 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
192                ocp::accelerator_management::BindingPciVid& msg);
193 
194 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
195                                            std::span<uint8_t> buf);
196 
197 int decodeQueryDeviceIdentificationResponse(
198     std::span<const uint8_t> buf,
199     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
200     uint8_t& deviceIdentification, uint8_t& deviceInstance);
201 
202 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
203                                        std::span<uint8_t> buf);
204 
205 int decodeGetTemperatureReadingResponse(
206     std::span<const uint8_t> buf,
207     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
208     double& temperatureReading);
209 
210 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
211                                        std::span<uint8_t> buf);
212 
213 int decodeReadThermalParametersResponse(
214     std::span<const uint8_t> buf,
215     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
216     int32_t& threshold);
217 
218 int encodeGetPowerDrawRequest(
219     PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
220     uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
221 
222 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
223                                ocp::accelerator_management::CompletionCode& cc,
224                                uint16_t& reasonCode, uint32_t& power);
225 
226 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
227                                          std::span<uint8_t> buf);
228 
229 int decodeGetCurrentEnergyCounterResponse(
230     std::span<const uint8_t> buf,
231     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
232     uint64_t& energy);
233 
234 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
235                             std::span<uint8_t> buf);
236 
237 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
238                              ocp::accelerator_management::CompletionCode& cc,
239                              uint16_t& reasonCode, uint32_t& voltage);
240 
241 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
242                                          std::span<uint8_t> buf);
243 
244 int decodeGetInventoryInformationResponse(
245     std::span<const uint8_t> buf,
246     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
247     InventoryPropertyId propertyId, InventoryValue& value);
248 
249 int encodeQueryScalarGroupTelemetryV2Request(
250     uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber,
251     uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf);
252 
253 int decodeQueryScalarGroupTelemetryV2Response(
254     std::span<const uint8_t> buf,
255     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
256     size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues);
257 
258 } // namespace gpu
259