1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <variant> 16 #include <vector> 17 18 namespace gpu 19 { 20 21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; 22 constexpr size_t maxInventoryDataSize = 256; 23 24 constexpr uint16_t nvidiaPciVendorId = 0x10de; 25 26 enum class MessageType : uint8_t 27 { 28 DEVICE_CAPABILITY_DISCOVERY = 0, 29 PCIE_LINK = 2, 30 PLATFORM_ENVIRONMENTAL = 3 31 }; 32 33 enum class DeviceCapabilityDiscoveryCommands : uint8_t 34 { 35 QUERY_DEVICE_IDENTIFICATION = 0x09, 36 }; 37 38 enum class PlatformEnvironmentalCommands : uint8_t 39 { 40 GET_TEMPERATURE_READING = 0x00, 41 READ_THERMAL_PARAMETERS = 0x02, 42 GET_CURRENT_POWER_DRAW = 0x03, 43 GET_MAX_OBSERVED_POWER = 0x04, 44 GET_CURRENT_ENERGY_COUNTER = 0x06, 45 GET_INVENTORY_INFORMATION = 0x0C, 46 GET_DRIVER_INFORMATION = 0x0E, 47 GET_VOLTAGE = 0x0F, 48 }; 49 50 enum class PcieLinkCommands : uint8_t 51 { 52 ListPCIePorts = 0x07, 53 QueryScalarGroupTelemetryV2 = 0x24, 54 }; 55 56 enum class DeviceIdentification : uint8_t 57 { 58 DEVICE_GPU = 0, 59 DEVICE_PCIE = 2, 60 DEVICE_SMA = 5 61 }; 62 63 enum class InventoryPropertyId : uint8_t 64 { 65 BOARD_PART_NUMBER = 0, 66 SERIAL_NUMBER = 1, 67 MARKETING_NAME = 2, 68 DEVICE_PART_NUMBER = 3, 69 FRU_PART_NUMBER = 4, 70 MEMORY_VENDOR = 5, 71 MEMORY_PART_NUMBER = 6, 72 MAX_MEMORY_CAPACITY = 7, 73 BUILD_DATE = 8, 74 FIRMWARE_VERSION = 9, 75 DEVICE_GUID = 10, 76 INFOROM_VERSION = 11, 77 PRODUCT_LENGTH = 12, 78 PRODUCT_WIDTH = 13, 79 PRODUCT_HEIGHT = 14, 80 RATED_DEVICE_POWER_LIMIT = 15, 81 MIN_DEVICE_POWER_LIMIT = 16, 82 MAX_DEVICE_POWER_LIMIT = 17, 83 MAX_MODULE_POWER_LIMIT = 18, 84 MIN_MODULE_POWER_LIMIT = 19, 85 RATED_MODULE_POWER_LIMIT = 20, 86 DEFAULT_BOOST_CLOCKS = 21, 87 DEFAULT_BASE_CLOCKS = 22, 88 DEFAULT_EDPP_SCALING = 23, 89 MIN_EDPP_SCALING = 24, 90 MAX_EDPP_SCALING = 25, 91 MIN_GRAPHICS_CLOCK = 26, 92 MAX_GRAPHICS_CLOCK = 27, 93 MIN_MEMORY_CLOCK = 28, 94 MAX_MEMORY_CLOCK = 29, 95 INFINIBAND_GUID = 30, 96 RACK_GUID = 31, 97 RACK_SLOT_NUMBER = 32, 98 COMPUTE_SLOT_INDEX = 33, 99 NODE_INDEX = 34, 100 GPU_NODE_ID = 35, 101 NVLINK_PEER_TYPE = 36 102 }; 103 104 enum class PciePortType : uint8_t 105 { 106 UPSTREAM = 0, 107 DOWNSTREAM = 1, 108 }; 109 110 enum class DriverState : uint8_t 111 { 112 DRIVER_STATE_UNKNOWN = 0, 113 DRIVER_STATE_NOT_LOADED = 1, 114 DRIVER_STATE_LOADED = 2, 115 }; 116 117 struct QueryDeviceIdentificationRequest 118 { 119 ocp::accelerator_management::CommonRequest hdr; 120 } __attribute__((packed)); 121 122 struct QueryDeviceIdentificationResponse 123 { 124 ocp::accelerator_management::CommonResponse hdr; 125 uint8_t device_identification; 126 uint8_t instance_id; 127 } __attribute__((packed)); 128 129 struct GetNumericSensorReadingRequest 130 { 131 ocp::accelerator_management::CommonRequest hdr; 132 uint8_t sensor_id; 133 } __attribute__((packed)); 134 135 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 136 137 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 138 139 struct GetPowerDrawRequest 140 { 141 ocp::accelerator_management::CommonRequest hdr; 142 uint8_t sensorId; 143 uint8_t averagingInterval; 144 } __attribute__((packed)); 145 146 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 147 148 using GetVoltageRequest = GetNumericSensorReadingRequest; 149 150 struct QueryScalarGroupTelemetryV2Request 151 { 152 ocp::accelerator_management::CommonRequest hdr; 153 uint8_t upstreamPortNumber; 154 uint8_t portNumber; 155 uint8_t groupId; 156 } __attribute__((packed)); 157 158 struct GetTemperatureReadingResponse 159 { 160 ocp::accelerator_management::CommonResponse hdr; 161 int32_t reading; 162 } __attribute__((packed)); 163 164 struct ReadThermalParametersResponse 165 { 166 ocp::accelerator_management::CommonResponse hdr; 167 int32_t threshold; 168 } __attribute__((packed)); 169 170 struct GetPowerDrawResponse 171 { 172 ocp::accelerator_management::CommonResponse hdr; 173 uint32_t power; 174 } __attribute__((packed)); 175 176 struct GetCurrentEnergyCounterResponse 177 { 178 ocp::accelerator_management::CommonResponse hdr; 179 uint64_t energy; 180 } __attribute__((packed)); 181 182 struct GetVoltageResponse 183 { 184 ocp::accelerator_management::CommonResponse hdr; 185 uint32_t voltage; 186 } __attribute__((packed)); 187 188 struct ListPCIePortsResponse 189 { 190 ocp::accelerator_management::CommonResponse hdr; 191 uint16_t numUpstreamPorts; 192 } __attribute__((packed)); 193 194 struct ListPCIePortsDownstreamPortsData 195 { 196 uint8_t isInternal; 197 uint8_t count; 198 } __attribute__((packed)); 199 200 struct GetDriverInformationResponse 201 { 202 ocp::accelerator_management::CommonResponse hdr; 203 DriverState driverState; 204 char driverVersion; 205 } __attribute__((packed)); 206 207 struct GetInventoryInformationRequest 208 { 209 ocp::accelerator_management::CommonRequest hdr; 210 uint8_t property_id; 211 } __attribute__((packed)); 212 213 struct GetInventoryInformationResponse 214 { 215 ocp::accelerator_management::CommonResponse hdr; 216 std::array<uint8_t, maxInventoryDataSize> data; 217 } __attribute__((packed)); 218 219 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 220 ocp::accelerator_management::BindingPciVid& msg); 221 222 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 223 std::span<uint8_t> buf); 224 225 int decodeQueryDeviceIdentificationResponse( 226 std::span<const uint8_t> buf, 227 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 228 uint8_t& deviceIdentification, uint8_t& deviceInstance); 229 230 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 231 std::span<uint8_t> buf); 232 233 int decodeGetTemperatureReadingResponse( 234 std::span<const uint8_t> buf, 235 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 236 double& temperatureReading); 237 238 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 239 std::span<uint8_t> buf); 240 241 int decodeReadThermalParametersResponse( 242 std::span<const uint8_t> buf, 243 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 244 int32_t& threshold); 245 246 int encodeGetPowerDrawRequest( 247 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 248 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 249 250 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 251 ocp::accelerator_management::CompletionCode& cc, 252 uint16_t& reasonCode, uint32_t& power); 253 254 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 255 std::span<uint8_t> buf); 256 257 int decodeGetCurrentEnergyCounterResponse( 258 std::span<const uint8_t> buf, 259 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 260 uint64_t& energy); 261 262 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 263 std::span<uint8_t> buf); 264 265 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 266 ocp::accelerator_management::CompletionCode& cc, 267 uint16_t& reasonCode, uint32_t& voltage); 268 269 int encodeGetDriverInformationRequest(uint8_t instanceId, 270 std::span<uint8_t> buf); 271 272 int decodeGetDriverInformationResponse( 273 std::span<const uint8_t> buf, 274 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 275 DriverState& driverState, std::string& driverVersion); 276 277 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 278 std::span<uint8_t> buf); 279 280 int decodeGetInventoryInformationResponse( 281 std::span<const uint8_t> buf, 282 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 283 InventoryPropertyId propertyId, InventoryValue& value); 284 285 int encodeQueryScalarGroupTelemetryV2Request( 286 uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber, 287 uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf); 288 289 int decodeQueryScalarGroupTelemetryV2Response( 290 std::span<const uint8_t> buf, 291 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 292 size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues); 293 294 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf); 295 296 int decodeListPciePortsResponse( 297 std::span<const uint8_t> buf, 298 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 299 uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts); 300 301 } // namespace gpu 302