1 /* 2 * SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 * SPDX-License-Identifier: Apache-2.0 4 */ 5 6 #pragma once 7 8 #include <OcpMctpVdm.hpp> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <span> 14 #include <string> 15 #include <variant> 16 #include <vector> 17 18 namespace gpu 19 { 20 21 using InventoryValue = std::variant<std::string, std::vector<uint8_t>>; 22 constexpr size_t maxInventoryDataSize = 256; 23 24 constexpr uint16_t nvidiaPciVendorId = 0x10de; 25 26 enum class MessageType : uint8_t 27 { 28 DEVICE_CAPABILITY_DISCOVERY = 0, 29 PLATFORM_ENVIRONMENTAL = 3 30 }; 31 32 enum class DeviceCapabilityDiscoveryCommands : uint8_t 33 { 34 QUERY_DEVICE_IDENTIFICATION = 0x09, 35 }; 36 37 enum class PlatformEnvironmentalCommands : uint8_t 38 { 39 GET_TEMPERATURE_READING = 0x00, 40 READ_THERMAL_PARAMETERS = 0x02, 41 GET_CURRENT_POWER_DRAW = 0x03, 42 GET_MAX_OBSERVED_POWER = 0x04, 43 GET_CURRENT_ENERGY_COUNTER = 0x06, 44 GET_INVENTORY_INFORMATION = 0x0C, 45 GET_VOLTAGE = 0x0F, 46 }; 47 48 enum class DeviceIdentification : uint8_t 49 { 50 DEVICE_GPU = 0, 51 DEVICE_SMA = 5 52 }; 53 54 enum class InventoryPropertyId : uint8_t 55 { 56 BOARD_PART_NUMBER = 0, 57 SERIAL_NUMBER = 1, 58 MARKETING_NAME = 2, 59 DEVICE_PART_NUMBER = 3, 60 FRU_PART_NUMBER = 4, 61 MEMORY_VENDOR = 5, 62 MEMORY_PART_NUMBER = 6, 63 MAX_MEMORY_CAPACITY = 7, 64 BUILD_DATE = 8, 65 FIRMWARE_VERSION = 9, 66 DEVICE_GUID = 10, 67 INFOROM_VERSION = 11, 68 PRODUCT_LENGTH = 12, 69 PRODUCT_WIDTH = 13, 70 PRODUCT_HEIGHT = 14, 71 RATED_DEVICE_POWER_LIMIT = 15, 72 MIN_DEVICE_POWER_LIMIT = 16, 73 MAX_DEVICE_POWER_LIMIT = 17, 74 MAX_MODULE_POWER_LIMIT = 18, 75 MIN_MODULE_POWER_LIMIT = 19, 76 RATED_MODULE_POWER_LIMIT = 20, 77 DEFAULT_BOOST_CLOCKS = 21, 78 DEFAULT_BASE_CLOCKS = 22, 79 DEFAULT_EDPP_SCALING = 23, 80 MIN_EDPP_SCALING = 24, 81 MAX_EDPP_SCALING = 25, 82 MIN_GRAPHICS_CLOCK = 26, 83 MAX_GRAPHICS_CLOCK = 27, 84 MIN_MEMORY_CLOCK = 28, 85 MAX_MEMORY_CLOCK = 29, 86 INFINIBAND_GUID = 30, 87 RACK_GUID = 31, 88 RACK_SLOT_NUMBER = 32, 89 COMPUTE_SLOT_INDEX = 33, 90 NODE_INDEX = 34, 91 GPU_NODE_ID = 35, 92 NVLINK_PEER_TYPE = 36 93 }; 94 95 struct QueryDeviceIdentificationRequest 96 { 97 ocp::accelerator_management::CommonRequest hdr; 98 } __attribute__((packed)); 99 100 struct QueryDeviceIdentificationResponse 101 { 102 ocp::accelerator_management::CommonResponse hdr; 103 uint8_t device_identification; 104 uint8_t instance_id; 105 } __attribute__((packed)); 106 107 struct GetNumericSensorReadingRequest 108 { 109 ocp::accelerator_management::CommonRequest hdr; 110 uint8_t sensor_id; 111 } __attribute__((packed)); 112 113 using GetTemperatureReadingRequest = GetNumericSensorReadingRequest; 114 115 using ReadThermalParametersRequest = GetNumericSensorReadingRequest; 116 117 struct GetPowerDrawRequest 118 { 119 ocp::accelerator_management::CommonRequest hdr; 120 uint8_t sensorId; 121 uint8_t averagingInterval; 122 } __attribute__((packed)); 123 124 using GetCurrentEnergyCounterRequest = GetNumericSensorReadingRequest; 125 126 using GetVoltageRequest = GetNumericSensorReadingRequest; 127 128 struct GetTemperatureReadingResponse 129 { 130 ocp::accelerator_management::CommonResponse hdr; 131 int32_t reading; 132 } __attribute__((packed)); 133 134 struct ReadThermalParametersResponse 135 { 136 ocp::accelerator_management::CommonResponse hdr; 137 int32_t threshold; 138 } __attribute__((packed)); 139 140 struct GetPowerDrawResponse 141 { 142 ocp::accelerator_management::CommonResponse hdr; 143 uint32_t power; 144 } __attribute__((packed)); 145 146 struct GetCurrentEnergyCounterResponse 147 { 148 ocp::accelerator_management::CommonResponse hdr; 149 uint64_t energy; 150 } __attribute__((packed)); 151 152 struct GetVoltageResponse 153 { 154 ocp::accelerator_management::CommonResponse hdr; 155 uint32_t voltage; 156 } __attribute__((packed)); 157 158 struct GetInventoryInformationRequest 159 { 160 ocp::accelerator_management::CommonRequest hdr; 161 uint8_t property_id; 162 } __attribute__((packed)); 163 164 struct GetInventoryInformationResponse 165 { 166 ocp::accelerator_management::CommonResponse hdr; 167 std::array<uint8_t, maxInventoryDataSize> data; 168 } __attribute__((packed)); 169 170 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr, 171 ocp::accelerator_management::BindingPciVid& msg); 172 173 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId, 174 std::span<uint8_t> buf); 175 176 int decodeQueryDeviceIdentificationResponse( 177 std::span<const uint8_t> buf, 178 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 179 uint8_t& deviceIdentification, uint8_t& deviceInstance); 180 181 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId, 182 std::span<uint8_t> buf); 183 184 int decodeGetTemperatureReadingResponse( 185 std::span<const uint8_t> buf, 186 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 187 double& temperatureReading); 188 189 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId, 190 std::span<uint8_t> buf); 191 192 int decodeReadThermalParametersResponse( 193 std::span<const uint8_t> buf, 194 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 195 int32_t& threshold); 196 197 int encodeGetPowerDrawRequest( 198 PlatformEnvironmentalCommands commandCode, uint8_t instanceId, 199 uint8_t sensorId, uint8_t averagingInterval, std::span<uint8_t> buf); 200 201 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf, 202 ocp::accelerator_management::CompletionCode& cc, 203 uint16_t& reasonCode, uint32_t& power); 204 205 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId, 206 std::span<uint8_t> buf); 207 208 int decodeGetCurrentEnergyCounterResponse( 209 std::span<const uint8_t> buf, 210 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 211 uint64_t& energy); 212 213 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId, 214 std::span<uint8_t> buf); 215 216 int decodeGetVoltageResponse(std::span<const uint8_t> buf, 217 ocp::accelerator_management::CompletionCode& cc, 218 uint16_t& reasonCode, uint32_t& voltage); 219 220 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId, 221 std::span<uint8_t> buf); 222 223 int decodeGetInventoryInformationResponse( 224 std::span<const uint8_t> buf, 225 ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode, 226 InventoryPropertyId propertyId, InventoryValue& value); 227 228 } // namespace gpu 229