1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <variant> 16 #include <vector> 17 18 namespace gpu 19 { 20 21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; 22 constexpr size_t maxInventoryDataSize = 256; 23 24 constexpr uint16_t nvidiaPciVendorId = 0x10de; 25 26 enum class MessageType : uint8_t 27 { 28 DEVICE_CAPABILITY_DISCOVERY = 0, 29 PCIE_LINK = 2, 30 PLATFORM_ENVIRONMENTAL = 3 31 }; 32 33 enum class DeviceCapabilityDiscoveryCommands : uint8_t 34 { 35 QUERY_DEVICE_IDENTIFICATION = 0x09, 36 }; 37 38 enum class PlatformEnvironmentalCommands : uint8_t 39 { 40 GET_TEMPERATURE_READING = 0x00, 41 READ_THERMAL_PARAMETERS = 0x02, 42 GET_CURRENT_POWER_DRAW = 0x03, 43 GET_MAX_OBSERVED_POWER = 0x04, 44 GET_CURRENT_ENERGY_COUNTER = 0x06, 45 GET_INVENTORY_INFORMATION = 0x0C, 46 GET_VOLTAGE = 0x0F, 47 }; 48 49 enum class PcieLinkCommands : uint8_t 50 { 51 ListPCIePorts = 0x07, 52 QueryScalarGroupTelemetryV2 = 0x24, 53 }; 54 55 enum class DeviceIdentification : uint8_t 56 { 57 DEVICE_GPU = 0, 58 DEVICE_PCIE = 2, 59 DEVICE_SMA = 5 60 }; 61 62 enum class InventoryPropertyId : uint8_t 63 { 64 BOARD_PART_NUMBER = 0, 65 SERIAL_NUMBER = 1, 66 MARKETING_NAME = 2, 67 DEVICE_PART_NUMBER = 3, 68 FRU_PART_NUMBER = 4, 69 MEMORY_VENDOR = 5, 70 MEMORY_PART_NUMBER = 6, 71 MAX_MEMORY_CAPACITY = 7, 72 BUILD_DATE = 8, 73 FIRMWARE_VERSION = 9, 74 DEVICE_GUID = 10, 75 INFOROM_VERSION = 11, 76 PRODUCT_LENGTH = 12, 77 PRODUCT_WIDTH = 13, 78 PRODUCT_HEIGHT = 14, 79 RATED_DEVICE_POWER_LIMIT = 15, 80 MIN_DEVICE_POWER_LIMIT = 16, 81 MAX_DEVICE_POWER_LIMIT = 17, 82 MAX_MODULE_POWER_LIMIT = 18, 83 MIN_MODULE_POWER_LIMIT = 19, 84 RATED_MODULE_POWER_LIMIT = 20, 85 DEFAULT_BOOST_CLOCKS = 21, 86 DEFAULT_BASE_CLOCKS = 22, 87 DEFAULT_EDPP_SCALING = 23, 88 MIN_EDPP_SCALING = 24, 89 MAX_EDPP_SCALING = 25, 90 MIN_GRAPHICS_CLOCK = 26, 91 MAX_GRAPHICS_CLOCK = 27, 92 MIN_MEMORY_CLOCK = 28, 93 MAX_MEMORY_CLOCK = 29, 94 INFINIBAND_GUID = 30, 95 RACK_GUID = 31, 96 RACK_SLOT_NUMBER = 32, 97 COMPUTE_SLOT_INDEX = 33, 98 NODE_INDEX = 34, 99 GPU_NODE_ID = 35, 100 NVLINK_PEER_TYPE = 36 101 }; 102 103 enum class PciePortType : uint8_t 104 { 105 UPSTREAM = 0, 106 DOWNSTREAM = 1, 107 }; 108 109 struct QueryDeviceIdentificationRequest 110 { 111 ocp::accelerator_management::CommonRequest hdr; 112 } __attribute__((packed)); 113 114 struct QueryDeviceIdentificationResponse 115 { 116 ocp::accelerator_management::CommonResponse hdr; 117 uint8_t device_identification; 118 uint8_t instance_id; 119 } __attribute__((packed)); 120 121 struct GetNumericSensorReadingRequest 122 { 123 ocp::accelerator_management::CommonRequest hdr; 124 uint8_t sensor_id; 125 } __attribute__((packed)); 126 127 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 128 129 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 130 131 struct GetPowerDrawRequest 132 { 133 ocp::accelerator_management::CommonRequest hdr; 134 uint8_t sensorId; 135 uint8_t averagingInterval; 136 } __attribute__((packed)); 137 138 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 139 140 using GetVoltageRequest = GetNumericSensorReadingRequest; 141 142 struct QueryScalarGroupTelemetryV2Request 143 { 144 ocp::accelerator_management::CommonRequest hdr; 145 uint8_t upstreamPortNumber; 146 uint8_t portNumber; 147 uint8_t groupId; 148 } __attribute__((packed)); 149 150 struct GetTemperatureReadingResponse 151 { 152 ocp::accelerator_management::CommonResponse hdr; 153 int32_t reading; 154 } __attribute__((packed)); 155 156 struct ReadThermalParametersResponse 157 { 158 ocp::accelerator_management::CommonResponse hdr; 159 int32_t threshold; 160 } __attribute__((packed)); 161 162 struct GetPowerDrawResponse 163 { 164 ocp::accelerator_management::CommonResponse hdr; 165 uint32_t power; 166 } __attribute__((packed)); 167 168 struct GetCurrentEnergyCounterResponse 169 { 170 ocp::accelerator_management::CommonResponse hdr; 171 uint64_t energy; 172 } __attribute__((packed)); 173 174 struct GetVoltageResponse 175 { 176 ocp::accelerator_management::CommonResponse hdr; 177 uint32_t voltage; 178 } __attribute__((packed)); 179 180 struct ListPCIePortsResponse 181 { 182 ocp::accelerator_management::CommonResponse hdr; 183 uint16_t numUpstreamPorts; 184 } __attribute__((packed)); 185 186 struct ListPCIePortsDownstreamPortsData 187 { 188 uint8_t isInternal; 189 uint8_t count; 190 } __attribute__((packed)); 191 192 struct GetInventoryInformationRequest 193 { 194 ocp::accelerator_management::CommonRequest hdr; 195 uint8_t property_id; 196 } __attribute__((packed)); 197 198 struct GetInventoryInformationResponse 199 { 200 ocp::accelerator_management::CommonResponse hdr; 201 std::array<uint8_t, maxInventoryDataSize> data; 202 } __attribute__((packed)); 203 204 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 205 ocp::accelerator_management::BindingPciVid& msg); 206 207 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 208 std::span<uint8_t> buf); 209 210 int decodeQueryDeviceIdentificationResponse( 211 std::span<const uint8_t> buf, 212 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 213 uint8_t& deviceIdentification, uint8_t& deviceInstance); 214 215 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 216 std::span<uint8_t> buf); 217 218 int decodeGetTemperatureReadingResponse( 219 std::span<const uint8_t> buf, 220 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 221 double& temperatureReading); 222 223 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 224 std::span<uint8_t> buf); 225 226 int decodeReadThermalParametersResponse( 227 std::span<const uint8_t> buf, 228 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 229 int32_t& threshold); 230 231 int encodeGetPowerDrawRequest( 232 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 233 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 234 235 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 236 ocp::accelerator_management::CompletionCode& cc, 237 uint16_t& reasonCode, uint32_t& power); 238 239 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 240 std::span<uint8_t> buf); 241 242 int decodeGetCurrentEnergyCounterResponse( 243 std::span<const uint8_t> buf, 244 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 245 uint64_t& energy); 246 247 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 248 std::span<uint8_t> buf); 249 250 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 251 ocp::accelerator_management::CompletionCode& cc, 252 uint16_t& reasonCode, uint32_t& voltage); 253 254 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 255 std::span<uint8_t> buf); 256 257 int decodeGetInventoryInformationResponse( 258 std::span<const uint8_t> buf, 259 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 260 InventoryPropertyId propertyId, InventoryValue& value); 261 262 int encodeQueryScalarGroupTelemetryV2Request( 263 uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber, 264 uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf); 265 266 int decodeQueryScalarGroupTelemetryV2Response( 267 std::span<const uint8_t> buf, 268 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 269 size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues); 270 271 int encodeListPciePortsRequest(uint8_t instanceId, std::span<uint8_t> buf); 272 273 int decodeListPciePortsResponse( 274 std::span<const uint8_t> buf, 275 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 276 uint16_t& numUpstreamPorts, std::vector<uint8_t>& numDownstreamPorts); 277 278 } // namespace gpu 279