xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuMctpVdm.cpp (revision 6b7123225fc4a5180faf89190e9f64a7e248e697)
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3  * AFFILIATES. All rights reserved.
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include "NvidiaGpuMctpVdm.hpp"
8 
9 #include "OcpMctpVdm.hpp"
10 
11 #include <endian.h>
12 
13 #include <cerrno>
14 #include <cstdint>
15 #include <span>
16 #include <vector>
17 
18 namespace gpu
19 {
20 // These functions encode/decode data communicated over the network
21 // The use of reinterpret_cast enables direct memory access to raw byte buffers
22 // without doing unnecessary data copying
23 // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
packHeader(const ocp::accelerator_management::BindingPciVidInfo & hdr,ocp::accelerator_management::BindingPciVid & msg)24 int packHeader(const ocp::accelerator_management::BindingPciVidInfo& hdr,
25                ocp::accelerator_management::BindingPciVid& msg)
26 {
27     return ocp::accelerator_management::packHeader(nvidiaPciVendorId, hdr, msg);
28 }
29 
encodeQueryDeviceIdentificationRequest(uint8_t instanceId,const std::span<uint8_t> buf)30 int encodeQueryDeviceIdentificationRequest(uint8_t instanceId,
31                                            const std::span<uint8_t> buf)
32 {
33     if (buf.size() < sizeof(QueryDeviceIdentificationRequest))
34     {
35         return EINVAL;
36     }
37 
38     auto* msg = reinterpret_cast<QueryDeviceIdentificationRequest*>(buf.data());
39 
40     ocp::accelerator_management::BindingPciVidInfo header{};
41 
42     header.ocp_accelerator_management_msg_type =
43         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
44     header.instance_id = instanceId &
45                          ocp::accelerator_management::instanceIdBitMask;
46     header.msg_type =
47         static_cast<uint8_t>(MessageType::DEVICE_CAPABILITY_DISCOVERY);
48 
49     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
50 
51     if (rc != 0)
52     {
53         return rc;
54     }
55 
56     msg->hdr.command = static_cast<uint8_t>(
57         DeviceCapabilityDiscoveryCommands::QUERY_DEVICE_IDENTIFICATION);
58     msg->hdr.data_size = 0;
59 
60     return 0;
61 }
62 
decodeQueryDeviceIdentificationResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint8_t & deviceIdentification,uint8_t & deviceInstance)63 int decodeQueryDeviceIdentificationResponse(
64     const std::span<const uint8_t> buf,
65     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
66     uint8_t& deviceIdentification, uint8_t& deviceInstance)
67 {
68     auto rc =
69         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
70 
71     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
72     {
73         return rc;
74     }
75 
76     if (buf.size() < sizeof(QueryDeviceIdentificationResponse))
77     {
78         return EINVAL;
79     }
80 
81     const auto* response =
82         reinterpret_cast<const QueryDeviceIdentificationResponse*>(buf.data());
83 
84     deviceIdentification = response->device_identification;
85     deviceInstance = response->instance_id;
86 
87     return 0;
88 }
89 
encodeGetTemperatureReadingRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)90 int encodeGetTemperatureReadingRequest(uint8_t instanceId, uint8_t sensorId,
91                                        std::span<uint8_t> buf)
92 {
93     if (buf.size() < sizeof(GetTemperatureReadingRequest))
94     {
95         return EINVAL;
96     }
97 
98     auto* msg = reinterpret_cast<GetTemperatureReadingRequest*>(buf.data());
99 
100     ocp::accelerator_management::BindingPciVidInfo header{};
101     header.ocp_accelerator_management_msg_type =
102         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
103     header.instance_id = instanceId &
104                          ocp::accelerator_management::instanceIdBitMask;
105     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
106 
107     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
108 
109     if (rc != 0)
110     {
111         return rc;
112     }
113 
114     msg->hdr.command = static_cast<uint8_t>(
115         PlatformEnvironmentalCommands::GET_TEMPERATURE_READING);
116     msg->hdr.data_size = sizeof(sensorId);
117     msg->sensor_id = sensorId;
118 
119     return 0;
120 }
121 
decodeGetTemperatureReadingResponse(const std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,double & temperatureReading)122 int decodeGetTemperatureReadingResponse(
123     const std::span<const uint8_t> buf,
124     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
125     double& temperatureReading)
126 {
127     auto rc =
128         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
129 
130     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
131     {
132         return rc;
133     }
134 
135     if (buf.size() < sizeof(GetTemperatureReadingResponse))
136     {
137         return EINVAL;
138     }
139 
140     const auto* response =
141         reinterpret_cast<const GetTemperatureReadingResponse*>(buf.data());
142 
143     uint16_t dataSize = le16toh(response->hdr.data_size);
144 
145     if (dataSize != sizeof(int32_t))
146     {
147         return EINVAL;
148     }
149 
150     int32_t reading = le32toh(response->reading);
151     temperatureReading = reading / static_cast<double>(1 << 8);
152 
153     return 0;
154 }
155 
encodeReadThermalParametersRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)156 int encodeReadThermalParametersRequest(uint8_t instanceId, uint8_t sensorId,
157                                        std::span<uint8_t> buf)
158 {
159     if (buf.size() < sizeof(ReadThermalParametersRequest))
160     {
161         return EINVAL;
162     }
163 
164     auto* msg = reinterpret_cast<ReadThermalParametersRequest*>(buf.data());
165 
166     ocp::accelerator_management::BindingPciVidInfo header{};
167     header.ocp_accelerator_management_msg_type =
168         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
169     header.instance_id = instanceId &
170                          ocp::accelerator_management::instanceIdBitMask;
171     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
172 
173     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
174 
175     if (rc != 0)
176     {
177         return rc;
178     }
179 
180     msg->hdr.command = static_cast<uint8_t>(
181         PlatformEnvironmentalCommands::READ_THERMAL_PARAMETERS);
182     msg->hdr.data_size = sizeof(sensorId);
183     msg->sensor_id = sensorId;
184 
185     return 0;
186 }
187 
decodeReadThermalParametersResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,int32_t & threshold)188 int decodeReadThermalParametersResponse(
189     std::span<const uint8_t> buf,
190     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
191     int32_t& threshold)
192 {
193     auto rc =
194         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
195 
196     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
197     {
198         return rc;
199     }
200 
201     if (buf.size() < sizeof(ReadThermalParametersResponse))
202     {
203         return EINVAL;
204     }
205 
206     const auto* response =
207         reinterpret_cast<const ReadThermalParametersResponse*>(buf.data());
208 
209     uint16_t dataSize = le16toh(response->hdr.data_size);
210 
211     if (dataSize != sizeof(int32_t))
212     {
213         return EINVAL;
214     }
215 
216     threshold = le32toh(response->threshold);
217 
218     return 0;
219 }
220 
encodeGetPowerDrawRequest(PlatformEnvironmentalCommands commandCode,uint8_t instanceId,uint8_t sensorId,uint8_t averagingInterval,std::span<uint8_t> buf)221 int encodeGetPowerDrawRequest(PlatformEnvironmentalCommands commandCode,
222                               uint8_t instanceId, uint8_t sensorId,
223                               uint8_t averagingInterval, std::span<uint8_t> buf)
224 {
225     if (buf.size() < sizeof(GetPowerDrawRequest))
226     {
227         return EINVAL;
228     }
229 
230     auto* msg = reinterpret_cast<GetPowerDrawRequest*>(buf.data());
231 
232     ocp::accelerator_management::BindingPciVidInfo header{};
233     header.ocp_accelerator_management_msg_type =
234         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
235     header.instance_id = instanceId &
236                          ocp::accelerator_management::instanceIdBitMask;
237     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
238 
239     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
240 
241     if (rc != 0)
242     {
243         return rc;
244     }
245 
246     msg->hdr.command = static_cast<uint8_t>(commandCode);
247     msg->hdr.data_size = sizeof(sensorId) + sizeof(averagingInterval);
248     msg->sensorId = sensorId;
249     msg->averagingInterval = averagingInterval;
250 
251     return 0;
252 }
253 
decodeGetPowerDrawResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint32_t & power)254 int decodeGetPowerDrawResponse(std::span<const uint8_t> buf,
255                                ocp::accelerator_management::CompletionCode& cc,
256                                uint16_t& reasonCode, uint32_t& power)
257 {
258     auto rc =
259         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
260 
261     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
262     {
263         return rc;
264     }
265 
266     if (buf.size() < sizeof(GetPowerDrawResponse))
267     {
268         return EINVAL;
269     }
270 
271     const auto* response =
272         reinterpret_cast<const GetPowerDrawResponse*>(buf.data());
273 
274     const uint16_t dataSize = le16toh(response->hdr.data_size);
275 
276     if (dataSize != sizeof(uint32_t))
277     {
278         return EINVAL;
279     }
280 
281     power = le32toh(response->power);
282 
283     return 0;
284 }
285 
encodeGetCurrentEnergyCounterRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)286 int encodeGetCurrentEnergyCounterRequest(uint8_t instanceId, uint8_t sensorId,
287                                          std::span<uint8_t> buf)
288 {
289     if (buf.size() < sizeof(GetTemperatureReadingRequest))
290     {
291         return EINVAL;
292     }
293 
294     auto* msg = reinterpret_cast<GetCurrentEnergyCounterRequest*>(buf.data());
295 
296     ocp::accelerator_management::BindingPciVidInfo header{};
297     header.ocp_accelerator_management_msg_type =
298         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
299     header.instance_id = instanceId &
300                          ocp::accelerator_management::instanceIdBitMask;
301     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
302 
303     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
304 
305     if (rc != 0)
306     {
307         return rc;
308     }
309 
310     msg->hdr.command = static_cast<uint8_t>(
311         PlatformEnvironmentalCommands::GET_CURRENT_ENERGY_COUNTER);
312     msg->hdr.data_size = sizeof(sensorId);
313     msg->sensor_id = sensorId;
314 
315     return 0;
316 }
317 
decodeGetCurrentEnergyCounterResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint64_t & energy)318 int decodeGetCurrentEnergyCounterResponse(
319     std::span<const uint8_t> buf,
320     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
321     uint64_t& energy)
322 {
323     auto rc =
324         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
325 
326     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
327     {
328         return rc;
329     }
330 
331     if (buf.size() < sizeof(GetPowerDrawResponse))
332     {
333         return EINVAL;
334     }
335 
336     const auto* response =
337         reinterpret_cast<const GetCurrentEnergyCounterResponse*>(buf.data());
338 
339     const uint16_t dataSize = le16toh(response->hdr.data_size);
340 
341     if (dataSize != sizeof(uint64_t))
342     {
343         return EINVAL;
344     }
345 
346     energy = le32toh(response->energy);
347 
348     return 0;
349 }
350 
encodeGetVoltageRequest(uint8_t instanceId,uint8_t sensorId,std::span<uint8_t> buf)351 int encodeGetVoltageRequest(uint8_t instanceId, uint8_t sensorId,
352                             std::span<uint8_t> buf)
353 {
354     if (buf.size() < sizeof(GetVoltageRequest))
355     {
356         return EINVAL;
357     }
358 
359     auto* msg = reinterpret_cast<GetVoltageRequest*>(buf.data());
360 
361     ocp::accelerator_management::BindingPciVidInfo header{};
362     header.ocp_accelerator_management_msg_type =
363         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
364     header.instance_id = instanceId &
365                          ocp::accelerator_management::instanceIdBitMask;
366     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
367 
368     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
369 
370     if (rc != 0)
371     {
372         return rc;
373     }
374 
375     msg->hdr.command =
376         static_cast<uint8_t>(PlatformEnvironmentalCommands::GET_VOLTAGE);
377     msg->hdr.data_size = sizeof(sensorId);
378     msg->sensor_id = sensorId;
379 
380     return 0;
381 }
382 
decodeGetVoltageResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,uint32_t & voltage)383 int decodeGetVoltageResponse(std::span<const uint8_t> buf,
384                              ocp::accelerator_management::CompletionCode& cc,
385                              uint16_t& reasonCode, uint32_t& voltage)
386 {
387     auto rc =
388         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
389 
390     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
391     {
392         return rc;
393     }
394 
395     if (buf.size() < sizeof(GetVoltageResponse))
396     {
397         return EINVAL;
398     }
399 
400     const auto* response =
401         reinterpret_cast<const GetVoltageResponse*>(buf.data());
402 
403     const uint16_t dataSize = le16toh(response->hdr.data_size);
404 
405     if (dataSize != sizeof(uint32_t))
406     {
407         return EINVAL;
408     }
409 
410     voltage = le32toh(response->voltage);
411 
412     return 0;
413 }
414 
encodeGetInventoryInformationRequest(uint8_t instanceId,uint8_t propertyId,std::span<uint8_t> buf)415 int encodeGetInventoryInformationRequest(uint8_t instanceId, uint8_t propertyId,
416                                          std::span<uint8_t> buf)
417 {
418     if (buf.size() < sizeof(GetInventoryInformationRequest))
419     {
420         return EINVAL;
421     }
422 
423     auto* msg = reinterpret_cast<GetInventoryInformationRequest*>(buf.data());
424 
425     ocp::accelerator_management::BindingPciVidInfo header{};
426     header.ocp_accelerator_management_msg_type =
427         static_cast<uint8_t>(ocp::accelerator_management::MessageType::REQUEST);
428     header.instance_id = instanceId &
429                          ocp::accelerator_management::instanceIdBitMask;
430     header.msg_type = static_cast<uint8_t>(MessageType::PLATFORM_ENVIRONMENTAL);
431 
432     auto rc = packHeader(header, msg->hdr.msgHdr.hdr);
433 
434     if (rc != 0)
435     {
436         return rc;
437     }
438 
439     msg->hdr.command = static_cast<uint8_t>(
440         PlatformEnvironmentalCommands::GET_INVENTORY_INFORMATION);
441     msg->hdr.data_size = sizeof(propertyId);
442     msg->property_id = propertyId;
443 
444     return 0;
445 }
446 
decodeGetInventoryInformationResponse(std::span<const uint8_t> buf,ocp::accelerator_management::CompletionCode & cc,uint16_t & reasonCode,InventoryPropertyId propertyId,InventoryValue & value)447 int decodeGetInventoryInformationResponse(
448     std::span<const uint8_t> buf,
449     ocp::accelerator_management::CompletionCode& cc, uint16_t& reasonCode,
450     InventoryPropertyId propertyId, InventoryValue& value)
451 {
452     auto rc =
453         ocp::accelerator_management::decodeReasonCodeAndCC(buf, cc, reasonCode);
454     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
455     {
456         return rc;
457     }
458     // Expect at least one byte of inventory response data after common response
459     if (buf.size() < (sizeof(ocp::accelerator_management::CommonResponse) + 1))
460     {
461         return EINVAL;
462     }
463 
464     const auto* response =
465         reinterpret_cast<const GetInventoryInformationResponse*>(buf.data());
466     uint16_t dataSize = le16toh(response->hdr.data_size);
467 
468     if (dataSize == 0 || dataSize > maxInventoryDataSize)
469     {
470         return EINVAL;
471     }
472 
473     const uint8_t* dataPtr = response->data.data();
474 
475     switch (propertyId)
476     {
477         case InventoryPropertyId::BOARD_PART_NUMBER:
478         case InventoryPropertyId::SERIAL_NUMBER:
479         case InventoryPropertyId::MARKETING_NAME:
480         case InventoryPropertyId::DEVICE_PART_NUMBER:
481             value =
482                 std::string(reinterpret_cast<const char*>(dataPtr), dataSize);
483             break;
484         case InventoryPropertyId::DEVICE_GUID:
485             value = std::vector<uint8_t>(dataPtr, dataPtr + dataSize);
486             break;
487         default:
488             return EINVAL;
489     }
490     return 0;
491 }
492 
493 // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
494 } // namespace gpu
495