xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaGpuThresholds.cpp (revision 5e7deccd14dcac790028a6641291cc019c1c4e52)
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3  * AFFILIATES. All rights reserved.
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include "NvidiaGpuThresholds.hpp"
8 
9 #include <MctpRequester.hpp>
10 #include <NvidiaGpuMctpVdm.hpp>
11 #include <OcpMctpVdm.hpp>
12 #include <phosphor-logging/lg2.hpp>
13 
14 #include <array>
15 #include <cerrno>
16 #include <cstddef>
17 #include <cstdint>
18 #include <functional>
19 #include <memory>
20 #include <span>
21 #include <vector>
22 
processReadThermalParameterResponse(const std::function<void (uint8_t,int32_t)> & callback,const std::span<const uint8_t> respMsg,int sendRecvMsgResult)23 void processReadThermalParameterResponse(
24     const std::function<void(uint8_t, int32_t)>& callback,
25     const std::span<const uint8_t> respMsg, int sendRecvMsgResult)
26 {
27     if (sendRecvMsgResult != 0)
28     {
29         lg2::error(
30             "Error reading thermal parameter: sending message over MCTP failed, rc={RC}",
31             "RC", sendRecvMsgResult);
32         callback(EPROTO, 0);
33         return;
34     }
35 
36     ocp::accelerator_management::CompletionCode cc{};
37     uint16_t reasonCode = 0;
38     int32_t threshold = 0;
39 
40     auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode,
41                                                        threshold);
42 
43     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
44     {
45         lg2::error(
46             "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
47             "RC", rc, "CC", cc, "RESC", reasonCode);
48         callback(EPROTO, 0);
49         return;
50     }
51 
52     callback(0, threshold);
53 };
54 
readThermalParameter(uint8_t eid,uint8_t id,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,int32_t)> & callback)55 void readThermalParameter(uint8_t eid, uint8_t id,
56                           mctp::MctpRequester& mctpRequester,
57                           const std::function<void(uint8_t, int32_t)>& callback)
58 {
59     auto reqMsg = std::make_shared<
60         std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>();
61 
62     auto respMsg = std::make_shared<
63         std::array<uint8_t, sizeof(gpu::ReadThermalParametersResponse)>>();
64 
65     auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg);
66     if (rc != 0)
67     {
68         lg2::error(
69             "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}",
70             "EID", eid, "PID", id, "RC", rc);
71         callback(rc, 0);
72         return;
73     }
74 
75     mctpRequester.sendRecvMsg(
76         eid, *reqMsg, *respMsg,
77         [reqMsg, respMsg, callback](int sendRecvMsgResult) {
78             processReadThermalParameterResponse(callback, *respMsg,
79                                                 sendRecvMsgResult);
80         });
81 }
82 
readThermalParameterCallback(uint8_t eid,const std::shared_ptr<std::vector<uint8_t>> & ids,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,std::vector<int32_t>)> & callback,size_t index,const std::shared_ptr<std::vector<int32_t>> & thresholds,uint8_t rc,int32_t threshold)83 void readThermalParameterCallback(
84     uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids,
85     mctp::MctpRequester& mctpRequester,
86     const std::function<void(uint8_t, std::vector<int32_t>)>& callback,
87     size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds,
88     uint8_t rc, int32_t threshold)
89 {
90     if (rc != 0)
91     {
92         lg2::error(
93             "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}",
94             "EID", eid, "PID", (*ids)[index], "RC", rc);
95         callback(rc, *thresholds);
96         return;
97     }
98 
99     thresholds->push_back(threshold);
100 
101     ++index;
102     if (index == ids->size())
103     {
104         callback(rc, *thresholds);
105     }
106     else
107     {
108         readThermalParameter(eid, (*ids)[index], mctpRequester,
109                              std::bind_front(readThermalParameterCallback, eid,
110                                              ids, std::ref(mctpRequester),
111                                              callback, index, thresholds));
112     }
113 }
114 
readThermalParameters(uint8_t eid,const std::vector<uint8_t> & ids,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,std::vector<int32_t>)> & callback)115 void readThermalParameters(
116     uint8_t eid, const std::vector<uint8_t>& ids,
117     mctp::MctpRequester& mctpRequester,
118     const std::function<void(uint8_t, std::vector<int32_t>)>& callback)
119 {
120     auto thresholds = std::make_shared<std::vector<int32_t>>();
121     size_t index = 0;
122 
123     readThermalParameter(
124         eid, ids[index], mctpRequester,
125         std::bind_front(readThermalParameterCallback, eid,
126                         std::make_shared<std::vector<uint8_t>>(ids),
127                         std::ref(mctpRequester), callback, index, thresholds));
128 }
129