1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <variant> 16 #include <vector> 17 18 namespace gpu 19 { 20 21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; 22 constexpr size_t maxInventoryDataSize = 256; 23 24 constexpr uint16_t nvidiaPciVendorId = 0x10de; 25 26 enum class MessageType : uint8_t 27 { 28 DEVICE_CAPABILITY_DISCOVERY = 0, 29 PCIE_LINK = 2, 30 PLATFORM_ENVIRONMENTAL = 3 31 }; 32 33 enum class DeviceCapabilityDiscoveryCommands : uint8_t 34 { 35 QUERY_DEVICE_IDENTIFICATION = 0x09, 36 }; 37 38 enum class PlatformEnvironmentalCommands : uint8_t 39 { 40 GET_TEMPERATURE_READING = 0x00, 41 READ_THERMAL_PARAMETERS = 0x02, 42 GET_CURRENT_POWER_DRAW = 0x03, 43 GET_MAX_OBSERVED_POWER = 0x04, 44 GET_CURRENT_ENERGY_COUNTER = 0x06, 45 GET_INVENTORY_INFORMATION = 0x0C, 46 GET_VOLTAGE = 0x0F, 47 }; 48 49 enum class PcieLinkCommands : uint8_t 50 { 51 QueryScalarGroupTelemetryV2 = 0x24, 52 }; 53 54 enum class DeviceIdentification : uint8_t 55 { 56 DEVICE_GPU = 0, 57 DEVICE_PCIE = 2, 58 DEVICE_SMA = 5 59 }; 60 61 enum class InventoryPropertyId : uint8_t 62 { 63 BOARD_PART_NUMBER = 0, 64 SERIAL_NUMBER = 1, 65 MARKETING_NAME = 2, 66 DEVICE_PART_NUMBER = 3, 67 FRU_PART_NUMBER = 4, 68 MEMORY_VENDOR = 5, 69 MEMORY_PART_NUMBER = 6, 70 MAX_MEMORY_CAPACITY = 7, 71 BUILD_DATE = 8, 72 FIRMWARE_VERSION = 9, 73 DEVICE_GUID = 10, 74 INFOROM_VERSION = 11, 75 PRODUCT_LENGTH = 12, 76 PRODUCT_WIDTH = 13, 77 PRODUCT_HEIGHT = 14, 78 RATED_DEVICE_POWER_LIMIT = 15, 79 MIN_DEVICE_POWER_LIMIT = 16, 80 MAX_DEVICE_POWER_LIMIT = 17, 81 MAX_MODULE_POWER_LIMIT = 18, 82 MIN_MODULE_POWER_LIMIT = 19, 83 RATED_MODULE_POWER_LIMIT = 20, 84 DEFAULT_BOOST_CLOCKS = 21, 85 DEFAULT_BASE_CLOCKS = 22, 86 DEFAULT_EDPP_SCALING = 23, 87 MIN_EDPP_SCALING = 24, 88 MAX_EDPP_SCALING = 25, 89 MIN_GRAPHICS_CLOCK = 26, 90 MAX_GRAPHICS_CLOCK = 27, 91 MIN_MEMORY_CLOCK = 28, 92 MAX_MEMORY_CLOCK = 29, 93 INFINIBAND_GUID = 30, 94 RACK_GUID = 31, 95 RACK_SLOT_NUMBER = 32, 96 COMPUTE_SLOT_INDEX = 33, 97 NODE_INDEX = 34, 98 GPU_NODE_ID = 35, 99 NVLINK_PEER_TYPE = 36 100 }; 101 102 enum class PciePortType : uint8_t 103 { 104 UPSTREAM = 0, 105 DOWNSTREAM = 1, 106 }; 107 108 struct QueryDeviceIdentificationRequest 109 { 110 ocp::accelerator_management::CommonRequest hdr; 111 } __attribute__((packed)); 112 113 struct QueryDeviceIdentificationResponse 114 { 115 ocp::accelerator_management::CommonResponse hdr; 116 uint8_t device_identification; 117 uint8_t instance_id; 118 } __attribute__((packed)); 119 120 struct GetNumericSensorReadingRequest 121 { 122 ocp::accelerator_management::CommonRequest hdr; 123 uint8_t sensor_id; 124 } __attribute__((packed)); 125 126 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 127 128 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 129 130 struct GetPowerDrawRequest 131 { 132 ocp::accelerator_management::CommonRequest hdr; 133 uint8_t sensorId; 134 uint8_t averagingInterval; 135 } __attribute__((packed)); 136 137 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 138 139 using GetVoltageRequest = GetNumericSensorReadingRequest; 140 141 struct QueryScalarGroupTelemetryV2Request 142 { 143 ocp::accelerator_management::CommonRequest hdr; 144 uint8_t upstreamPortNumber; 145 uint8_t portNumber; 146 uint8_t groupId; 147 } __attribute__((packed)); 148 149 struct GetTemperatureReadingResponse 150 { 151 ocp::accelerator_management::CommonResponse hdr; 152 int32_t reading; 153 } __attribute__((packed)); 154 155 struct ReadThermalParametersResponse 156 { 157 ocp::accelerator_management::CommonResponse hdr; 158 int32_t threshold; 159 } __attribute__((packed)); 160 161 struct GetPowerDrawResponse 162 { 163 ocp::accelerator_management::CommonResponse hdr; 164 uint32_t power; 165 } __attribute__((packed)); 166 167 struct GetCurrentEnergyCounterResponse 168 { 169 ocp::accelerator_management::CommonResponse hdr; 170 uint64_t energy; 171 } __attribute__((packed)); 172 173 struct GetVoltageResponse 174 { 175 ocp::accelerator_management::CommonResponse hdr; 176 uint32_t voltage; 177 } __attribute__((packed)); 178 179 struct GetInventoryInformationRequest 180 { 181 ocp::accelerator_management::CommonRequest hdr; 182 uint8_t property_id; 183 } __attribute__((packed)); 184 185 struct GetInventoryInformationResponse 186 { 187 ocp::accelerator_management::CommonResponse hdr; 188 std::array<uint8_t, maxInventoryDataSize> data; 189 } __attribute__((packed)); 190 191 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 192 ocp::accelerator_management::BindingPciVid& msg); 193 194 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 195 std::span<uint8_t> buf); 196 197 int decodeQueryDeviceIdentificationResponse( 198 std::span<const uint8_t> buf, 199 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 200 uint8_t& deviceIdentification, uint8_t& deviceInstance); 201 202 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 203 std::span<uint8_t> buf); 204 205 int decodeGetTemperatureReadingResponse( 206 std::span<const uint8_t> buf, 207 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 208 double& temperatureReading); 209 210 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 211 std::span<uint8_t> buf); 212 213 int decodeReadThermalParametersResponse( 214 std::span<const uint8_t> buf, 215 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 216 int32_t& threshold); 217 218 int encodeGetPowerDrawRequest( 219 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 220 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 221 222 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 223 ocp::accelerator_management::CompletionCode& cc, 224 uint16_t& reasonCode, uint32_t& power); 225 226 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 227 std::span<uint8_t> buf); 228 229 int decodeGetCurrentEnergyCounterResponse( 230 std::span<const uint8_t> buf, 231 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 232 uint64_t& energy); 233 234 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 235 std::span<uint8_t> buf); 236 237 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 238 ocp::accelerator_management::CompletionCode& cc, 239 uint16_t& reasonCode, uint32_t& voltage); 240 241 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 242 std::span<uint8_t> buf); 243 244 int decodeGetInventoryInformationResponse( 245 std::span<const uint8_t> buf, 246 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 247 InventoryPropertyId propertyId, InventoryValue& value); 248 249 int encodeQueryScalarGroupTelemetryV2Request( 250 uint8_t instanceId, PciePortType portType, uint8_t upstreamPortNumber, 251 uint8_t portNumber, uint8_t groupId, std::span<uint8_t> buf); 252 253 int decodeQueryScalarGroupTelemetryV2Response( 254 std::span<const uint8_t> buf, 255 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 256 size_t& numTelemetryValues, std::vector<uint32_t>& telemetryValues); 257 258 } // namespace gpu 259