xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp (revision 86786b6c21320f3da413b9deed07d2f5360edabd)
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3  * AFFILIATES. All rights reserved.
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include "NvidiaGpuMctpVdm.hpp"
8 
9 #include "OcpMctpVdm.hpp"
10 
11 #include <endian.h>
12 
13 #include <cerrno>
14 #include <cstdint>
15 #include <span>
16 #include <vector>
17 
18 namespace gpu
19 {
20 // These functions encode/decode data communicated over the network
21 // The use of reinterpret_cast enables direct memory access to raw byte buffers
22 // without doing unnecessary data copying
23 // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
packHeader(const ocp::accelerator_management::BindingPciVidInfo & hdr,ocp::accelerator_management::BindingPciVid & msg)24 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
25                ocp::accelerator_management::BindingPciVid& msg)
26 {
27     return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
28 }
29 
encodeQueryDeviceIdentificationRequest(uint8_t instanceId,const std::span<uint8_t> buf)30 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
31                                            const std::span<uint8_t> buf)
32 {
33     if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
34     {
35         return EINVAL;
36     }
37 
38     auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
39 
40     ocp::accelerator_management::BindingPciVidInfo header{};
41 
42     header.ocp_accelerator_management_msg_type =
43         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
44     header.instance_id = instanceId &
45                          ocp::accelerator_management::instanceIdBitMask;
46     header.msg_type =
47         static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
48 
49     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
50 
51     if (rc != 0)
52     {
53         return rc;
54     }
55 
56     msg->hdr.command = static_cast<uint8_t>(
57         DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
58     msg->hdr.data_size = 0;
59 
60     return 0;
61 }
62 
decodeQueryDeviceIdentificationResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint8_t & deviceIdentification,uint8_t & deviceInstance)63 int decodeQueryDeviceIdentificationResponse(
64     const std::span<const uint8_t> buf,
65     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
66     uint8_t& deviceIdentification, uint8_t& deviceInstance)
67 {
68     auto rc =
69         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
70 
71     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
72     {
73         return rc;
74     }
75 
76     if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
77     {
78         return EINVAL;
79     }
80 
81     const auto* response =
82         reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
83 
84     deviceIdentification = response->device_identification;
85     deviceInstance = response->instance_id;
86 
87     return 0;
88 }
89 
encodeGetTemperatureReadingRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)90 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
91                                        std::span<uint8_t> buf)
92 {
93     if (buf.size() < sizeof(GetTemperatureReadingRequest))
94     {
95         return EINVAL;
96     }
97 
98     auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
99 
100     ocp::accelerator_management::BindingPciVidInfo header{};
101     header.ocp_accelerator_management_msg_type =
102         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
103     header.instance_id = instanceId &
104                          ocp::accelerator_management::instanceIdBitMask;
105     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
106 
107     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
108 
109     if (rc != 0)
110     {
111         return rc;
112     }
113 
114     msg->hdr.command = static_cast<uint8_t>(
115         PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
116     msg->hdr.data_size = sizeof(sensorId);
117     msg->sensor_id = sensorId;
118 
119     return 0;
120 }
121 
decodeGetTemperatureReadingResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,double & temperatureReading)122 int decodeGetTemperatureReadingResponse(
123     const std::span<const uint8_t> buf,
124     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
125     double& temperatureReading)
126 {
127     auto rc =
128         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
129 
130     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
131     {
132         return rc;
133     }
134 
135     if (buf.size() < sizeof(GetTemperatureReadingResponse))
136     {
137         return EINVAL;
138     }
139 
140     const auto* response =
141         reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
142 
143     uint16_t dataSize = le16toh(response->hdr.data_size);
144 
145     if (dataSize != sizeof(int32_t))
146     {
147         return EINVAL;
148     }
149 
150     int32_t reading = le32toh(response->reading);
151     temperatureReading = reading / static_cast<double>(1 << 8);
152 
153     return 0;
154 }
155 
encodeReadThermalParametersRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)156 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
157                                        std::span<uint8_t> buf)
158 {
159     if (buf.size() < sizeof(ReadThermalParametersRequest))
160     {
161         return EINVAL;
162     }
163 
164     auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
165 
166     ocp::accelerator_management::BindingPciVidInfo header{};
167     header.ocp_accelerator_management_msg_type =
168         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
169     header.instance_id = instanceId &
170                          ocp::accelerator_management::instanceIdBitMask;
171     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
172 
173     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
174 
175     if (rc != 0)
176     {
177         return rc;
178     }
179 
180     msg->hdr.command = static_cast<uint8_t>(
181         PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
182     msg->hdr.data_size = sizeof(sensorId);
183     msg->sensor_id = sensorId;
184 
185     return 0;
186 }
187 
decodeReadThermalParametersResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,int32_t & threshold)188 int decodeReadThermalParametersResponse(
189     std::span<const uint8_t> buf,
190     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
191     int32_t& threshold)
192 {
193     auto rc =
194         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
195 
196     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
197     {
198         return rc;
199     }
200 
201     if (buf.size() < sizeof(ReadThermalParametersResponse))
202     {
203         return EINVAL;
204     }
205 
206     const auto* response =
207         reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
208 
209     uint16_t dataSize = le16toh(response->hdr.data_size);
210 
211     if (dataSize != sizeof(int32_t))
212     {
213         return EINVAL;
214     }
215 
216     threshold = le32toh(response->threshold);
217 
218     return 0;
219 }
220 
encodeGetCurrentPowerDrawRequest(uint8_t instanceId,uint8_t sensorId,uint8_t averagingInterval,std::span<uint8_t> buf)221 int encodeGetCurrentPowerDrawRequest(uint8_t instanceId, uint8_t sensorId,
222                                      uint8_t averagingInterval,
223                                      std::span<uint8_t> buf)
224 {
225     if (buf.size() < sizeof(GetCurrentPowerDrawRequest))
226     {
227         return EINVAL;
228     }
229 
230     auto* msg = reinterpret_cast<GetCurrentPowerDrawRequest*>(buf.data());
231 
232     ocp::accelerator_management::BindingPciVidInfo header{};
233     header.ocp_accelerator_management_msg_type =
234         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
235     header.instance_id = instanceId &
236                          ocp::accelerator_management::instanceIdBitMask;
237     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
238 
239     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
240 
241     if (rc != 0)
242     {
243         return rc;
244     }
245 
246     msg->hdr.command = static_cast<uint8_t>(
247         PlatformEnvironmentalCommands::GET_CURRENT_POWER_DRAW);
248     msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
249     msg->sensorId = sensorId;
250     msg->averagingInterval = averagingInterval;
251 
252     return 0;
253 }
254 
decodeGetCurrentPowerDrawResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint32_t & power)255 int decodeGetCurrentPowerDrawResponse(
256     std::span<const uint8_t> buf,
257     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
258     uint32_t& power)
259 {
260     auto rc =
261         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
262 
263     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
264     {
265         return rc;
266     }
267 
268     if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
269     {
270         return EINVAL;
271     }
272 
273     const auto* response =
274         reinterpret_cast<const GetCurrentPowerDrawResponse*>(buf.data());
275 
276     const uint16_t dataSize = le16toh(response->hdr.data_size);
277 
278     if (dataSize != sizeof(uint32_t))
279     {
280         return EINVAL;
281     }
282 
283     power = le32toh(response->power);
284 
285     return 0;
286 }
287 
encodeGetCurrentEnergyCounterRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)288 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
289                                          std::span<uint8_t> buf)
290 {
291     if (buf.size() < sizeof(GetTemperatureReadingRequest))
292     {
293         return EINVAL;
294     }
295 
296     auto* msg = reinterpret_cast<GetCurrentEnergyCounterRequest*>(buf.data());
297 
298     ocp::accelerator_management::BindingPciVidInfo header{};
299     header.ocp_accelerator_management_msg_type =
300         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
301     header.instance_id = instanceId &
302                          ocp::accelerator_management::instanceIdBitMask;
303     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
304 
305     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
306 
307     if (rc != 0)
308     {
309         return rc;
310     }
311 
312     msg->hdr.command = static_cast<uint8_t>(
313         PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER);
314     msg->hdr.data_size = sizeof(sensorId);
315     msg->sensor_id = sensorId;
316 
317     return 0;
318 }
319 
decodeGetCurrentEnergyCounterResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint64_t & energy)320 int decodeGetCurrentEnergyCounterResponse(
321     std::span<const uint8_t> buf,
322     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
323     uint64_t& energy)
324 {
325     auto rc =
326         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
327 
328     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
329     {
330         return rc;
331     }
332 
333     if (buf.size() < sizeof(GetCurrentPowerDrawResponse))
334     {
335         return EINVAL;
336     }
337 
338     const auto* response =
339         reinterpret_cast<const GetCurrentEnergyCounterResponse*>(buf.data());
340 
341     const uint16_t dataSize = le16toh(response->hdr.data_size);
342 
343     if (dataSize != sizeof(uint64_t))
344     {
345         return EINVAL;
346     }
347 
348     energy = le32toh(response->energy);
349 
350     return 0;
351 }
352 
encodeGetVoltageRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)353 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
354                             std::span<uint8_t> buf)
355 {
356     if (buf.size() < sizeof(GetVoltageRequest))
357     {
358         return EINVAL;
359     }
360 
361     auto* msg = reinterpret_cast<GetVoltageRequest*>(buf.data());
362 
363     ocp::accelerator_management::BindingPciVidInfo header{};
364     header.ocp_accelerator_management_msg_type =
365         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
366     header.instance_id = instanceId &
367                          ocp::accelerator_management::instanceIdBitMask;
368     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
369 
370     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
371 
372     if (rc != 0)
373     {
374         return rc;
375     }
376 
377     msg->hdr.command =
378         static_cast<uint8_t>(PlatformEnvironmentalCommands::GET_VOLTAGE);
379     msg->hdr.data_size = sizeof(sensorId);
380     msg->sensor_id = sensorId;
381 
382     return 0;
383 }
384 
decodeGetVoltageResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint32_t & voltage)385 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
386                              ocp::accelerator_management::CompletionCode& cc,
387                              uint16_t& reasonCode, uint32_t& voltage)
388 {
389     auto rc =
390         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
391 
392     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
393     {
394         return rc;
395     }
396 
397     if (buf.size() < sizeof(GetVoltageResponse))
398     {
399         return EINVAL;
400     }
401 
402     const auto* response =
403         reinterpret_cast<const GetVoltageResponse*>(buf.data());
404 
405     const uint16_t dataSize = le16toh(response->hdr.data_size);
406 
407     if (dataSize != sizeof(uint32_t))
408     {
409         return EINVAL;
410     }
411 
412     voltage = le32toh(response->voltage);
413 
414     return 0;
415 }
416 
encodeGetInventoryInformationRequest(uint8_t instanceId,uint8_t propertyId,std::span<uint8_t> buf)417 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
418                                          std::span<uint8_t> buf)
419 {
420     if (buf.size() < sizeof(GetInventoryInformationRequest))
421     {
422         return EINVAL;
423     }
424 
425     auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
426 
427     ocp::accelerator_management::BindingPciVidInfo header{};
428     header.ocp_accelerator_management_msg_type =
429         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
430     header.instance_id = instanceId &
431                          ocp::accelerator_management::instanceIdBitMask;
432     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
433 
434     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
435 
436     if (rc != 0)
437     {
438         return rc;
439     }
440 
441     msg->hdr.command = static_cast<uint8_t>(
442         PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
443     msg->hdr.data_size = sizeof(propertyId);
444     msg->property_id = propertyId;
445 
446     return 0;
447 }
448 
decodeGetInventoryInformationResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,InventoryPropertyId propertyId,InventoryValue & value)449 int decodeGetInventoryInformationResponse(
450     std::span<const uint8_t> buf,
451     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
452     InventoryPropertyId propertyId, InventoryValue& value)
453 {
454     auto rc =
455         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
456     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
457     {
458         return rc;
459     }
460     // Expect at least one byte of inventory response data after common response
461     if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1))
462     {
463         return EINVAL;
464     }
465 
466     const auto* response =
467         reinterpret_cast<const GetInventoryInformationResponse*>(buf.data());
468     uint16_t dataSize = le16toh(response->hdr.data_size);
469 
470     if (dataSize == 0 || dataSize > maxInventoryDataSize)
471     {
472         return EINVAL;
473     }
474 
475     const uint8_t* dataPtr = response->data.data();
476 
477     switch (propertyId)
478     {
479         case InventoryPropertyId::BOARD_PART_NUMBER:
480         case InventoryPropertyId::SERIAL_NUMBER:
481         case InventoryPropertyId::MARKETING_NAME:
482         case InventoryPropertyId::DEVICE_PART_NUMBER:
483             value =
484                 std::string(reinterpret_cast<const char*>(dataPtr), dataSize);
485             break;
486         case InventoryPropertyId::DEVICE_GUID:
487             value = std::vector<uint8_t>(dataPtr, dataPtr + dataSize);
488             break;
489         default:
490             return EINVAL;
491     }
492     return 0;
493 }
494 
495 // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
496 } // namespace gpu
497