xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.hpp (revision 6b7123225fc4a5180faf89190e9f64a7e248e697)
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3  * AFFILIATES. All rights reserved.
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #pragma once
8 
9 #include <OcpMctpVdm.hpp>
10 
11 #include <array>
12 #include <cstddef>
13 #include <cstdint>
14 #include <span>
15 #include <string>
16 #include <variant>
17 #include <vector>
18 
19 namespace gpu
20 {
21 
22 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>;
23 constexpr size_t maxInventoryDataSize = 256;
24 
25 constexpr uint16_t nvidiaPciVendorId = 0x10de;
26 
27 enum class MessageType : uint8_t
28 {
29     DEVICE_CAPABILITY_DISCOVERY = 0,
30     PLATFORM_ENVIRONMENTAL = 3
31 };
32 
33 enum class DeviceCapabilityDiscoveryCommands : uint8_t
34 {
35     QUERY_DEVICE_IDENTIFICATION = 0x09,
36 };
37 
38 enum class PlatformEnvironmentalCommands : uint8_t
39 {
40     GET_TEMPERATURE_READING = 0x00,
41     READ_THERMAL_PARAMETERS = 0x02,
42     GET_CURRENT_POWER_DRAW = 0x03,
43     GET_MAX_OBSERVED_POWER = 0x04,
44     GET_CURRENT_ENERGY_COUNTER = 0x06,
45     GET_INVENTORY_INFORMATION = 0x0C,
46     GET_VOLTAGE = 0x0F,
47 };
48 
49 enum class DeviceIdentification : uint8_t
50 {
51     DEVICE_GPU = 0,
52     DEVICE_SMA = 5
53 };
54 
55 enum class InventoryPropertyId : uint8_t
56 {
57     BOARD_PART_NUMBER = 0,
58     SERIAL_NUMBER = 1,
59     MARKETING_NAME = 2,
60     DEVICE_PART_NUMBER = 3,
61     FRU_PART_NUMBER = 4,
62     MEMORY_VENDOR = 5,
63     MEMORY_PART_NUMBER = 6,
64     MAX_MEMORY_CAPACITY = 7,
65     BUILD_DATE = 8,
66     FIRMWARE_VERSION = 9,
67     DEVICE_GUID = 10,
68     INFOROM_VERSION = 11,
69     PRODUCT_LENGTH = 12,
70     PRODUCT_WIDTH = 13,
71     PRODUCT_HEIGHT = 14,
72     RATED_DEVICE_POWER_LIMIT = 15,
73     MIN_DEVICE_POWER_LIMIT = 16,
74     MAX_DEVICE_POWER_LIMIT = 17,
75     MAX_MODULE_POWER_LIMIT = 18,
76     MIN_MODULE_POWER_LIMIT = 19,
77     RATED_MODULE_POWER_LIMIT = 20,
78     DEFAULT_BOOST_CLOCKS = 21,
79     DEFAULT_BASE_CLOCKS = 22,
80     DEFAULT_EDPP_SCALING = 23,
81     MIN_EDPP_SCALING = 24,
82     MAX_EDPP_SCALING = 25,
83     MIN_GRAPHICS_CLOCK = 26,
84     MAX_GRAPHICS_CLOCK = 27,
85     MIN_MEMORY_CLOCK = 28,
86     MAX_MEMORY_CLOCK = 29,
87     INFINIBAND_GUID = 30,
88     RACK_GUID = 31,
89     RACK_SLOT_NUMBER = 32,
90     COMPUTE_SLOT_INDEX = 33,
91     NODE_INDEX = 34,
92     GPU_NODE_ID = 35,
93     NVLINK_PEER_TYPE = 36
94 };
95 
96 struct QueryDeviceIdentificationRequest
97 {
98     ocp::accelerator_management::CommonRequest hdr;
99 } __attribute__((packed));
100 
101 struct QueryDeviceIdentificationResponse
102 {
103     ocp::accelerator_management::CommonResponse hdr;
104     uint8_t device_identification;
105     uint8_t instance_id;
106 } __attribute__((packed));
107 
108 struct GetNumericSensorReadingRequest
109 {
110     ocp::accelerator_management::CommonRequest hdr;
111     uint8_t sensor_id;
112 } __attribute__((packed));
113 
114 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest;
115 
116 using ReadThermalParametersRequest = GetNumericSensorReadingRequest;
117 
118 struct GetPowerDrawRequest
119 {
120     ocp::accelerator_management::CommonRequest hdr;
121     uint8_t sensorId;
122     uint8_t averagingInterval;
123 } __attribute__((packed));
124 
125 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest;
126 
127 using GetVoltageRequest = GetNumericSensorReadingRequest;
128 
129 struct GetTemperatureReadingResponse
130 {
131     ocp::accelerator_management::CommonResponse hdr;
132     int32_t reading;
133 } __attribute__((packed));
134 
135 struct ReadThermalParametersResponse
136 {
137     ocp::accelerator_management::CommonResponse hdr;
138     int32_t threshold;
139 } __attribute__((packed));
140 
141 struct GetPowerDrawResponse
142 {
143     ocp::accelerator_management::CommonResponse hdr;
144     uint32_t power;
145 } __attribute__((packed));
146 
147 struct GetCurrentEnergyCounterResponse
148 {
149     ocp::accelerator_management::CommonResponse hdr;
150     uint64_t energy;
151 } __attribute__((packed));
152 
153 struct GetVoltageResponse
154 {
155     ocp::accelerator_management::CommonResponse hdr;
156     uint32_t voltage;
157 } __attribute__((packed));
158 
159 struct GetInventoryInformationRequest
160 {
161     ocp::accelerator_management::CommonRequest hdr;
162     uint8_t property_id;
163 } __attribute__((packed));
164 
165 struct GetInventoryInformationResponse
166 {
167     ocp::accelerator_management::CommonResponse hdr;
168     std::array<uint8_t, maxInventoryDataSize> data;
169 } __attribute__((packed));
170 
171 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
172                ocp::accelerator_management::BindingPciVid& msg);
173 
174 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
175                                            std::span<uint8_t> buf);
176 
177 int decodeQueryDeviceIdentificationResponse(
178     std::span<const uint8_t> buf,
179     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
180     uint8_t& deviceIdentification, uint8_t& deviceInstance);
181 
182 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
183                                        std::span<uint8_t> buf);
184 
185 int decodeGetTemperatureReadingResponse(
186     std::span<const uint8_t> buf,
187     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
188     double& temperatureReading);
189 
190 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
191                                        std::span<uint8_t> buf);
192 
193 int decodeReadThermalParametersResponse(
194     std::span<const uint8_t> buf,
195     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
196     int32_t& threshold);
197 
198 int encodeGetPowerDrawRequest(
199     PlatformEnvironmentalCommands commandCode, uint8_t instanceId,
200     uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf);
201 
202 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
203                                ocp::accelerator_management::CompletionCode& cc,
204                                uint16_t& reasonCode, uint32_t& power);
205 
206 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
207                                          std::span<uint8_t> buf);
208 
209 int decodeGetCurrentEnergyCounterResponse(
210     std::span<const uint8_t> buf,
211     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
212     uint64_t& energy);
213 
214 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
215                             std::span<uint8_t> buf);
216 
217 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
218                              ocp::accelerator_management::CompletionCode& cc,
219                              uint16_t& reasonCode, uint32_t& voltage);
220 
221 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
222                                          std::span<uint8_t> buf);
223 
224 int decodeGetInventoryInformationResponse(
225     std::span<const uint8_t> buf,
226     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
227     InventoryPropertyId propertyId, InventoryValue& value);
228 
229 } // namespace gpu
230