1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <utility> 16 #include <variant> 17 #include <vector> 18 19 namespace gpu 20 { 21 22 using InventoryValue = 23 std::variant<std::string, std::vector<uint8_t>, uint32_t>; 24 constexpr size_t maxInventoryDataSize = 256; 25 26 constexpr uint16_t nvidiaPciVendorId = 0x10de; 27 28 enum class MessageType : uint8_t 29 { 30 DEVICE_CAPABILITY_DISCOVERY = 0, 31 NETWORK_PORT = 1, 32 PCIE_LINK = 2, 33 PLATFORM_ENVIRONMENTAL = 3 34 }; 35 36 enum class DeviceCapabilityDiscoveryCommands : uint8_t 37 { 38 QUERY_DEVICE_IDENTIFICATION = 0x09, 39 }; 40 41 enum class PlatformEnvironmentalCommands : uint8_t 42 { 43 GET_TEMPERATURE_READING = 0x00, 44 READ_THERMAL_PARAMETERS = 0x02, 45 GET_CURRENT_POWER_DRAW = 0x03, 46 GET_MAX_OBSERVED_POWER = 0x04, 47 GET_CURRENT_ENERGY_COUNTER = 0x06, 48 GET_INVENTORY_INFORMATION = 0x0C, 49 GET_DRIVER_INFORMATION = 0x0E, 50 GET_VOLTAGE = 0x0F, 51 }; 52 53 enum class NetworkPortCommands : uint8_t 54 { 55 GetEthernetPortTelemetryCounters = 0x0F, 56 GetPortNetworkAddresses = 0x11, 57 }; 58 59 enum class PcieLinkCommands : uint8_t 60 { 61 ListPCIePorts = 0x07, 62 QueryScalarGroupTelemetryV2 = 0x24, 63 }; 64 65 enum class DeviceIdentification : uint8_t 66 { 67 DEVICE_GPU = 0, 68 DEVICE_PCIE = 2, 69 DEVICE_SMA = 5 70 }; 71 72 enum class InventoryPropertyId : uint8_t 73 { 74 BOARD_PART_NUMBER = 0, 75 SERIAL_NUMBER = 1, 76 MARKETING_NAME = 2, 77 DEVICE_PART_NUMBER = 3, 78 FRU_PART_NUMBER = 4, 79 MEMORY_VENDOR = 5, 80 MEMORY_PART_NUMBER = 6, 81 MAX_MEMORY_CAPACITY = 7, 82 BUILD_DATE = 8, 83 FIRMWARE_VERSION = 9, 84 DEVICE_GUID = 10, 85 INFOROM_VERSION = 11, 86 PRODUCT_LENGTH = 12, 87 PRODUCT_WIDTH = 13, 88 PRODUCT_HEIGHT = 14, 89 RATED_DEVICE_POWER_LIMIT = 15, 90 MIN_DEVICE_POWER_LIMIT = 16, 91 MAX_DEVICE_POWER_LIMIT = 17, 92 MAX_MODULE_POWER_LIMIT = 18, 93 MIN_MODULE_POWER_LIMIT = 19, 94 RATED_MODULE_POWER_LIMIT = 20, 95 DEFAULT_BOOST_CLOCKS = 21, 96 DEFAULT_BASE_CLOCKS = 22, 97 DEFAULT_EDPP_SCALING = 23, 98 MIN_EDPP_SCALING = 24, 99 MAX_EDPP_SCALING = 25, 100 MIN_GRAPHICS_CLOCK = 26, 101 MAX_GRAPHICS_CLOCK = 27, 102 MIN_MEMORY_CLOCK = 28, 103 MAX_MEMORY_CLOCK = 29, 104 INFINIBAND_GUID = 30, 105 RACK_GUID = 31, 106 RACK_SLOT_NUMBER = 32, 107 COMPUTE_SLOT_INDEX = 33, 108 NODE_INDEX = 34, 109 GPU_NODE_ID = 35, 110 NVLINK_PEER_TYPE = 36 111 }; 112 113 enum class PciePortType : uint8_t 114 { 115 UPSTREAM = 0, 116 DOWNSTREAM = 1, 117 }; 118 119 enum class DriverState : uint8_t 120 { 121 DRIVER_STATE_UNKNOWN = 0, 122 DRIVER_STATE_NOT_LOADED = 1, 123 DRIVER_STATE_LOADED = 2, 124 }; 125 126 enum class NetworkPortLinkType : uint8_t 127 { 128 ETHERNET = 0, 129 INFINIBAND = 1, 130 UNKNOWN = 0xFF, 131 }; 132 133 struct QueryDeviceIdentificationRequest 134 { 135 ocp::accelerator_management::CommonRequest hdr; 136 } __attribute__((packed)); 137 138 struct QueryDeviceIdentificationResponse 139 { 140 ocp::accelerator_management::CommonResponse hdr; 141 uint8_t device_identification; 142 uint8_t instance_id; 143 } __attribute__((packed)); 144 145 struct GetNumericSensorReadingRequest 146 { 147 ocp::accelerator_management::CommonRequest hdr; 148 uint8_t sensor_id; 149 } __attribute__((packed)); 150 151 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 152 153 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 154 155 struct GetPowerDrawRequest 156 { 157 ocp::accelerator_management::CommonRequest hdr; 158 uint8_t sensorId; 159 uint8_t averagingInterval; 160 } __attribute__((packed)); 161 162 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 163 164 using GetVoltageRequest = GetNumericSensorReadingRequest; 165 166 struct QueryScalarGroupTelemetryV2Request 167 { 168 ocp::accelerator_management::CommonRequest hdr; 169 uint8_t upstreamPortNumber; 170 uint8_t portNumber; 171 uint8_t groupId; 172 } __attribute__((packed)); 173 174 struct GetPortNetworkAddressesRequest 175 { 176 ocp::accelerator_management::CommonRequest hdr; 177 uint16_t portNumber; 178 } __attribute__((packed)); 179 180 struct GetEthernetPortTelemetryCountersRequest 181 { 182 ocp::accelerator_management::CommonRequest hdr; 183 uint16_t portNumber; 184 } __attribute__((packed)); 185 186 struct GetTemperatureReadingResponse 187 { 188 ocp::accelerator_management::CommonResponse hdr; 189 int32_t reading; 190 } __attribute__((packed)); 191 192 struct ReadThermalParametersResponse 193 { 194 ocp::accelerator_management::CommonResponse hdr; 195 int32_t threshold; 196 } __attribute__((packed)); 197 198 struct GetPowerDrawResponse 199 { 200 ocp::accelerator_management::CommonResponse hdr; 201 uint32_t power; 202 } __attribute__((packed)); 203 204 struct GetCurrentEnergyCounterResponse 205 { 206 ocp::accelerator_management::CommonResponse hdr; 207 uint64_t energy; 208 } __attribute__((packed)); 209 210 struct GetVoltageResponse 211 { 212 ocp::accelerator_management::CommonResponse hdr; 213 uint32_t voltage; 214 } __attribute__((packed)); 215 216 struct ListPCIePortsResponse 217 { 218 ocp::accelerator_management::CommonResponse hdr; 219 uint16_t numUpstreamPorts; 220 } __attribute__((packed)); 221 222 struct ListPCIePortsDownstreamPortsData 223 { 224 uint8_t isInternal; 225 uint8_t count; 226 } __attribute__((packed)); 227 228 struct GetDriverInformationResponse 229 { 230 ocp::accelerator_management::CommonResponse hdr; 231 DriverState driverState; 232 char driverVersion; 233 } __attribute__((packed)); 234 235 struct GetInventoryInformationRequest 236 { 237 ocp::accelerator_management::CommonRequest hdr; 238 uint8_t property_id; 239 } __attribute__((packed)); 240 241 struct GetInventoryInformationResponse 242 { 243 ocp::accelerator_management::CommonResponse hdr; 244 std::array<uint8_t, maxInventoryDataSize> data; 245 } __attribute__((packed)); 246 247 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 248 ocp::accelerator_management::BindingPciVid& msg); 249 250 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 251 std::span<uint8_t> buf); 252 253 int decodeQueryDeviceIdentificationResponse( 254 std::span<const uint8_t> buf, 255 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 256 uint8_t& deviceIdentification, uint8_t& deviceInstance); 257 258 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 259 std::span<uint8_t> buf); 260 261 int decodeGetTemperatureReadingResponse( 262 std::span<const uint8_t> buf, 263 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 264 double& temperatureReading); 265 266 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 267 std::span<uint8_t> buf); 268 269 int decodeReadThermalParametersResponse( 270 std::span<const uint8_t> buf, 271 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 272 int32_t& threshold); 273 274 int encodeGetPowerDrawRequest( 275 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 276 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 277 278 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 279 ocp::accelerator_management::CompletionCode& cc, 280 uint16_t& reasonCode, uint32_t& power); 281 282 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 283 std::span<uint8_t> buf); 284 285 int decodeGetCurrentEnergyCounterResponse( 286 std::span<const uint8_t> buf, 287 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 288 uint64_t& energy); 289 290 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 291 std::span<uint8_t> buf); 292 293 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 294 ocp::accelerator_management::CompletionCode& cc, 295 uint16_t& reasonCode, uint32_t& voltage); 296 297 int encodeGetDriverInformationRequest(uint8_t instanceId, 298 std::span<uint8_t> buf); 299 300 int decodeGetDriverInformationResponse( 301 std::span<const uint8_t> buf, 302 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 303 DriverState& driverState, std::string& driverVersion); 304 305 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 306 std::span<uint8_t> buf); 307 308 int decodeGetInventoryInformationResponse( 309 std::span<const uint8_t> buf, 310 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 311 InventoryPropertyId propertyId, InventoryValue& value); 312 313 int encodeQueryScalarGroupTelemetryV2Request( 314 uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber, 315 uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf); 316 317 int decodeQueryScalarGroupTelemetryV2Response( 318 std::span<const uint8_t> buf, 319 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 320 size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues); 321 322 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf); 323 324 int decodeListPciePortsResponse( 325 std::span<const uint8_t> buf, 326 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 327 uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts); 328 329 int encodeGetPortNetworkAddressesRequest( 330 uint8_t instanceId, uint16_t portNumber, std::span<uint8_t> buf); 331 332 int decodeGetPortNetworkAddressesResponse( 333 std::span<const uint8_t> buf, 334 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 335 NetworkPortLinkType& linkType, 336 std::vector<std::pair<uint8_t, uint64_t>>& addresses); 337 338 int encodeGetEthernetPortTelemetryCountersRequest( 339 uint8_t instanceId, uint16_t portNumber, std::span<uint8_t> buf); 340 341 int decodeGetEthernetPortTelemetryCountersResponse( 342 std::span<const uint8_t> buf, 343 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 344 std::vector<std::pair<uint8_t, uint64_t>>& telemetryValues); 345 } // namespace gpu 346