1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <utility> 16 #include <variant> 17 #include <vector> 18 19 namespace gpu 20 { 21 22 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; 23 constexpr size_t maxInventoryDataSize = 256; 24 25 constexpr uint16_t nvidiaPciVendorId = 0x10de; 26 27 enum class MessageType : uint8_t 28 { 29 DEVICE_CAPABILITY_DISCOVERY = 0, 30 NETWORK_PORT = 1, 31 PCIE_LINK = 2, 32 PLATFORM_ENVIRONMENTAL = 3 33 }; 34 35 enum class DeviceCapabilityDiscoveryCommands : uint8_t 36 { 37 QUERY_DEVICE_IDENTIFICATION = 0x09, 38 }; 39 40 enum class PlatformEnvironmentalCommands : uint8_t 41 { 42 GET_TEMPERATURE_READING = 0x00, 43 READ_THERMAL_PARAMETERS = 0x02, 44 GET_CURRENT_POWER_DRAW = 0x03, 45 GET_MAX_OBSERVED_POWER = 0x04, 46 GET_CURRENT_ENERGY_COUNTER = 0x06, 47 GET_INVENTORY_INFORMATION = 0x0C, 48 GET_DRIVER_INFORMATION = 0x0E, 49 GET_VOLTAGE = 0x0F, 50 }; 51 52 enum class NetworkPortCommands : uint8_t 53 { 54 GetEthernetPortTelemetryCounters = 0x0F, 55 GetPortNetworkAddresses = 0x11, 56 }; 57 58 enum class PcieLinkCommands : uint8_t 59 { 60 ListPCIePorts = 0x07, 61 QueryScalarGroupTelemetryV2 = 0x24, 62 }; 63 64 enum class DeviceIdentification : uint8_t 65 { 66 DEVICE_GPU = 0, 67 DEVICE_PCIE = 2, 68 DEVICE_SMA = 5 69 }; 70 71 enum class InventoryPropertyId : uint8_t 72 { 73 BOARD_PART_NUMBER = 0, 74 SERIAL_NUMBER = 1, 75 MARKETING_NAME = 2, 76 DEVICE_PART_NUMBER = 3, 77 FRU_PART_NUMBER = 4, 78 MEMORY_VENDOR = 5, 79 MEMORY_PART_NUMBER = 6, 80 MAX_MEMORY_CAPACITY = 7, 81 BUILD_DATE = 8, 82 FIRMWARE_VERSION = 9, 83 DEVICE_GUID = 10, 84 INFOROM_VERSION = 11, 85 PRODUCT_LENGTH = 12, 86 PRODUCT_WIDTH = 13, 87 PRODUCT_HEIGHT = 14, 88 RATED_DEVICE_POWER_LIMIT = 15, 89 MIN_DEVICE_POWER_LIMIT = 16, 90 MAX_DEVICE_POWER_LIMIT = 17, 91 MAX_MODULE_POWER_LIMIT = 18, 92 MIN_MODULE_POWER_LIMIT = 19, 93 RATED_MODULE_POWER_LIMIT = 20, 94 DEFAULT_BOOST_CLOCKS = 21, 95 DEFAULT_BASE_CLOCKS = 22, 96 DEFAULT_EDPP_SCALING = 23, 97 MIN_EDPP_SCALING = 24, 98 MAX_EDPP_SCALING = 25, 99 MIN_GRAPHICS_CLOCK = 26, 100 MAX_GRAPHICS_CLOCK = 27, 101 MIN_MEMORY_CLOCK = 28, 102 MAX_MEMORY_CLOCK = 29, 103 INFINIBAND_GUID = 30, 104 RACK_GUID = 31, 105 RACK_SLOT_NUMBER = 32, 106 COMPUTE_SLOT_INDEX = 33, 107 NODE_INDEX = 34, 108 GPU_NODE_ID = 35, 109 NVLINK_PEER_TYPE = 36 110 }; 111 112 enum class PciePortType : uint8_t 113 { 114 UPSTREAM = 0, 115 DOWNSTREAM = 1, 116 }; 117 118 enum class DriverState : uint8_t 119 { 120 DRIVER_STATE_UNKNOWN = 0, 121 DRIVER_STATE_NOT_LOADED = 1, 122 DRIVER_STATE_LOADED = 2, 123 }; 124 125 enum class NetworkPortLinkType : uint8_t 126 { 127 ETHERNET = 0, 128 INFINIBAND = 1, 129 UNKNOWN = 0xFF, 130 }; 131 132 struct QueryDeviceIdentificationRequest 133 { 134 ocp::accelerator_management::CommonRequest hdr; 135 } __attribute__((packed)); 136 137 struct QueryDeviceIdentificationResponse 138 { 139 ocp::accelerator_management::CommonResponse hdr; 140 uint8_t device_identification; 141 uint8_t instance_id; 142 } __attribute__((packed)); 143 144 struct GetNumericSensorReadingRequest 145 { 146 ocp::accelerator_management::CommonRequest hdr; 147 uint8_t sensor_id; 148 } __attribute__((packed)); 149 150 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 151 152 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 153 154 struct GetPowerDrawRequest 155 { 156 ocp::accelerator_management::CommonRequest hdr; 157 uint8_t sensorId; 158 uint8_t averagingInterval; 159 } __attribute__((packed)); 160 161 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 162 163 using GetVoltageRequest = GetNumericSensorReadingRequest; 164 165 struct QueryScalarGroupTelemetryV2Request 166 { 167 ocp::accelerator_management::CommonRequest hdr; 168 uint8_t upstreamPortNumber; 169 uint8_t portNumber; 170 uint8_t groupId; 171 } __attribute__((packed)); 172 173 struct GetPortNetworkAddressesRequest 174 { 175 ocp::accelerator_management::CommonRequest hdr; 176 uint16_t portNumber; 177 } __attribute__((packed)); 178 179 struct GetEthernetPortTelemetryCountersRequest 180 { 181 ocp::accelerator_management::CommonRequest hdr; 182 uint16_t portNumber; 183 } __attribute__((packed)); 184 185 struct GetTemperatureReadingResponse 186 { 187 ocp::accelerator_management::CommonResponse hdr; 188 int32_t reading; 189 } __attribute__((packed)); 190 191 struct ReadThermalParametersResponse 192 { 193 ocp::accelerator_management::CommonResponse hdr; 194 int32_t threshold; 195 } __attribute__((packed)); 196 197 struct GetPowerDrawResponse 198 { 199 ocp::accelerator_management::CommonResponse hdr; 200 uint32_t power; 201 } __attribute__((packed)); 202 203 struct GetCurrentEnergyCounterResponse 204 { 205 ocp::accelerator_management::CommonResponse hdr; 206 uint64_t energy; 207 } __attribute__((packed)); 208 209 struct GetVoltageResponse 210 { 211 ocp::accelerator_management::CommonResponse hdr; 212 uint32_t voltage; 213 } __attribute__((packed)); 214 215 struct ListPCIePortsResponse 216 { 217 ocp::accelerator_management::CommonResponse hdr; 218 uint16_t numUpstreamPorts; 219 } __attribute__((packed)); 220 221 struct ListPCIePortsDownstreamPortsData 222 { 223 uint8_t isInternal; 224 uint8_t count; 225 } __attribute__((packed)); 226 227 struct GetDriverInformationResponse 228 { 229 ocp::accelerator_management::CommonResponse hdr; 230 DriverState driverState; 231 char driverVersion; 232 } __attribute__((packed)); 233 234 struct GetInventoryInformationRequest 235 { 236 ocp::accelerator_management::CommonRequest hdr; 237 uint8_t property_id; 238 } __attribute__((packed)); 239 240 struct GetInventoryInformationResponse 241 { 242 ocp::accelerator_management::CommonResponse hdr; 243 std::array<uint8_t, maxInventoryDataSize> data; 244 } __attribute__((packed)); 245 246 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 247 ocp::accelerator_management::BindingPciVid& msg); 248 249 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 250 std::span<uint8_t> buf); 251 252 int decodeQueryDeviceIdentificationResponse( 253 std::span<const uint8_t> buf, 254 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 255 uint8_t& deviceIdentification, uint8_t& deviceInstance); 256 257 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 258 std::span<uint8_t> buf); 259 260 int decodeGetTemperatureReadingResponse( 261 std::span<const uint8_t> buf, 262 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 263 double& temperatureReading); 264 265 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 266 std::span<uint8_t> buf); 267 268 int decodeReadThermalParametersResponse( 269 std::span<const uint8_t> buf, 270 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 271 int32_t& threshold); 272 273 int encodeGetPowerDrawRequest( 274 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 275 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 276 277 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 278 ocp::accelerator_management::CompletionCode& cc, 279 uint16_t& reasonCode, uint32_t& power); 280 281 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 282 std::span<uint8_t> buf); 283 284 int decodeGetCurrentEnergyCounterResponse( 285 std::span<const uint8_t> buf, 286 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 287 uint64_t& energy); 288 289 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 290 std::span<uint8_t> buf); 291 292 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 293 ocp::accelerator_management::CompletionCode& cc, 294 uint16_t& reasonCode, uint32_t& voltage); 295 296 int encodeGetDriverInformationRequest(uint8_t instanceId, 297 std::span<uint8_t> buf); 298 299 int decodeGetDriverInformationResponse( 300 std::span<const uint8_t> buf, 301 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 302 DriverState& driverState, std::string& driverVersion); 303 304 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 305 std::span<uint8_t> buf); 306 307 int decodeGetInventoryInformationResponse( 308 std::span<const uint8_t> buf, 309 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 310 InventoryPropertyId propertyId, InventoryValue& value); 311 312 int encodeQueryScalarGroupTelemetryV2Request( 313 uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber, 314 uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf); 315 316 int decodeQueryScalarGroupTelemetryV2Response( 317 std::span<const uint8_t> buf, 318 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 319 size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues); 320 321 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf); 322 323 int decodeListPciePortsResponse( 324 std::span<const uint8_t> buf, 325 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 326 uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts); 327 328 int encodeGetPortNetworkAddressesRequest( 329 uint8_t instanceId, uint16_t portNumber, std::span<uint8_t> buf); 330 331 int decodeGetPortNetworkAddressesResponse( 332 std::span<const uint8_t> buf, 333 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 334 NetworkPortLinkType& linkType, 335 std::vector<std::pair<uint8_t, uint64_t>>& addresses); 336 337 int encodeGetEthernetPortTelemetryCountersRequest( 338 uint8_t instanceId, uint16_t portNumber, std::span<uint8_t> buf); 339 340 int decodeGetEthernetPortTelemetryCountersResponse( 341 std::span<const uint8_t> buf, 342 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 343 std::vector<std::pair<uint8_t, uint64_t>>& telemetryValues); 344 } // namespace gpu 345