1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION &
3 * AFFILIATES. All rights reserved.
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include "NvidiaGpuThresholds.hpp"
8
9 #include <MctpRequester.hpp>
10 #include <NvidiaGpuMctpVdm.hpp>
11 #include <OcpMctpVdm.hpp>
12 #include <phosphor-logging/lg2.hpp>
13
14 #include <array>
15 #include <cerrno>
16 #include <cstddef>
17 #include <cstdint>
18 #include <functional>
19 #include <memory>
20 #include <span>
21 #include <vector>
22
processReadThermalParameterResponse(const std::function<void (uint8_t,int32_t)> & callback,const std::span<const uint8_t> respMsg,int sendRecvMsgResult)23 void processReadThermalParameterResponse(
24 const std::function<void(uint8_t, int32_t)>& callback,
25 const std::span<const uint8_t> respMsg, int sendRecvMsgResult)
26 {
27 if (sendRecvMsgResult != 0)
28 {
29 lg2::error(
30 "Error reading thermal parameter: sending message over MCTP failed, rc={RC}",
31 "RC", sendRecvMsgResult);
32 callback(EPROTO, 0);
33 return;
34 }
35
36 ocp::accelerator_management::CompletionCode cc{};
37 uint16_t reasonCode = 0;
38 int32_t threshold = 0;
39
40 auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode,
41 threshold);
42
43 if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
44 {
45 lg2::error(
46 "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}",
47 "RC", rc, "CC", cc, "RESC", reasonCode);
48 callback(EPROTO, 0);
49 return;
50 }
51
52 callback(0, threshold);
53 };
54
readThermalParameter(uint8_t eid,uint8_t id,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,int32_t)> & callback)55 void readThermalParameter(uint8_t eid, uint8_t id,
56 mctp::MctpRequester& mctpRequester,
57 const std::function<void(uint8_t, int32_t)>& callback)
58 {
59 auto reqMsg = std::make_shared<
60 std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>();
61
62 auto respMsg = std::make_shared<
63 std::array<uint8_t, sizeof(gpu::ReadThermalParametersResponse)>>();
64
65 auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg);
66 if (rc != 0)
67 {
68 lg2::error(
69 "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}",
70 "EID", eid, "PID", id, "RC", rc);
71 callback(rc, 0);
72 return;
73 }
74
75 mctpRequester.sendRecvMsg(
76 eid, *reqMsg, *respMsg,
77 [reqMsg, respMsg, callback](int sendRecvMsgResult) {
78 processReadThermalParameterResponse(callback, *respMsg,
79 sendRecvMsgResult);
80 });
81 }
82
readThermalParameterCallback(uint8_t eid,const std::shared_ptr<std::vector<uint8_t>> & ids,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,std::vector<int32_t>)> & callback,size_t index,const std::shared_ptr<std::vector<int32_t>> & thresholds,uint8_t rc,int32_t threshold)83 void readThermalParameterCallback(
84 uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids,
85 mctp::MctpRequester& mctpRequester,
86 const std::function<void(uint8_t, std::vector<int32_t>)>& callback,
87 size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds,
88 uint8_t rc, int32_t threshold)
89 {
90 if (rc != 0)
91 {
92 lg2::error(
93 "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}",
94 "EID", eid, "PID", (*ids)[index], "RC", rc);
95 callback(rc, *thresholds);
96 return;
97 }
98
99 thresholds->push_back(threshold);
100
101 ++index;
102 if (index == ids->size())
103 {
104 callback(rc, *thresholds);
105 }
106 else
107 {
108 readThermalParameter(eid, (*ids)[index], mctpRequester,
109 std::bind_front(readThermalParameterCallback, eid,
110 ids, std::ref(mctpRequester),
111 callback, index, thresholds));
112 }
113 }
114
readThermalParameters(uint8_t eid,const std::vector<uint8_t> & ids,mctp::MctpRequester & mctpRequester,const std::function<void (uint8_t,std::vector<int32_t>)> & callback)115 void readThermalParameters(
116 uint8_t eid, const std::vector<uint8_t>& ids,
117 mctp::MctpRequester& mctpRequester,
118 const std::function<void(uint8_t, std::vector<int32_t>)>& callback)
119 {
120 auto thresholds = std::make_shared<std::vector<int32_t>>();
121 size_t index = 0;
122
123 readThermalParameter(
124 eid, ids[index], mctpRequester,
125 std::bind_front(readThermalParameterCallback, eid,
126 std::make_shared<std::vector<uint8_t>>(ids),
127 std::ref(mctpRequester), callback, index, thresholds));
128 }
129