xref: /openbmc/dbus-sensors/src/nvidia-gpu/NvidiaEthPort.cpp (revision 7427aeef4225bf23715539b195a23bce10865265)
1*7427aeefSHarshit Aghera /*
2*7427aeefSHarshit Aghera  * SPDX-FileCopyrightText: Copyright OpenBMC Authors
3*7427aeefSHarshit Aghera  * SPDX-License-Identifier: Apache-2.0
4*7427aeefSHarshit Aghera  */
5*7427aeefSHarshit Aghera 
6*7427aeefSHarshit Aghera #include "NvidiaEthPort.hpp"
7*7427aeefSHarshit Aghera 
8*7427aeefSHarshit Aghera #include "NvidiaUtils.hpp"
9*7427aeefSHarshit Aghera #include "Utils.hpp"
10*7427aeefSHarshit Aghera 
11*7427aeefSHarshit Aghera #include <bits/basic_string.h>
12*7427aeefSHarshit Aghera 
13*7427aeefSHarshit Aghera #include <MctpRequester.hpp>
14*7427aeefSHarshit Aghera #include <NvidiaGpuMctpVdm.hpp>
15*7427aeefSHarshit Aghera #include <NvidiaPcieDevice.hpp>
16*7427aeefSHarshit Aghera #include <OcpMctpVdm.hpp>
17*7427aeefSHarshit Aghera #include <phosphor-logging/lg2.hpp>
18*7427aeefSHarshit Aghera #include <sdbusplus/asio/connection.hpp>
19*7427aeefSHarshit Aghera #include <sdbusplus/asio/object_server.hpp>
20*7427aeefSHarshit Aghera #include <sdbusplus/message/native_types.hpp>
21*7427aeefSHarshit Aghera 
22*7427aeefSHarshit Aghera #include <array>
23*7427aeefSHarshit Aghera #include <cstdint>
24*7427aeefSHarshit Aghera #include <format>
25*7427aeefSHarshit Aghera #include <functional>
26*7427aeefSHarshit Aghera #include <memory>
27*7427aeefSHarshit Aghera #include <span>
28*7427aeefSHarshit Aghera #include <string>
29*7427aeefSHarshit Aghera #include <system_error>
30*7427aeefSHarshit Aghera #include <utility>
31*7427aeefSHarshit Aghera #include <vector>
32*7427aeefSHarshit Aghera 
33*7427aeefSHarshit Aghera using std::string;
34*7427aeefSHarshit Aghera 
35*7427aeefSHarshit Aghera using namespace std::literals;
36*7427aeefSHarshit Aghera 
NvidiaEthPortMetrics(std::shared_ptr<sdbusplus::asio::connection> & conn,mctp::MctpRequester & mctpRequester,const std::string & name,const std::string & deviceName,const std::string & path,uint8_t eid,uint16_t portNumber,sdbusplus::asio::object_server & objectServer)37*7427aeefSHarshit Aghera NvidiaEthPortMetrics::NvidiaEthPortMetrics(
38*7427aeefSHarshit Aghera     std::shared_ptr<sdbusplus::asio::connection>& conn,
39*7427aeefSHarshit Aghera     mctp::MctpRequester& mctpRequester, const std::string& name,
40*7427aeefSHarshit Aghera     const std::string& deviceName, const std::string& path, uint8_t eid,
41*7427aeefSHarshit Aghera     uint16_t portNumber, sdbusplus::asio::object_server& objectServer) :
42*7427aeefSHarshit Aghera     eid(eid), portNumber(portNumber), path(path), conn(conn),
43*7427aeefSHarshit Aghera     mctpRequester(mctpRequester)
44*7427aeefSHarshit Aghera {
45*7427aeefSHarshit Aghera     const sdbusplus::message::object_path deviceDbusPath =
46*7427aeefSHarshit Aghera         sdbusplus::message::object_path(nicPathPrefix) / deviceName;
47*7427aeefSHarshit Aghera 
48*7427aeefSHarshit Aghera     const sdbusplus::message::object_path portDbusPath =
49*7427aeefSHarshit Aghera         sdbusplus::message::object_path(nicPathPrefix) / deviceName / name;
50*7427aeefSHarshit Aghera 
51*7427aeefSHarshit Aghera     const std::string metricsDbusPathPrefix =
52*7427aeefSHarshit Aghera         metricPath + std::format("port_{}_{}", deviceName, name);
53*7427aeefSHarshit Aghera 
54*7427aeefSHarshit Aghera     portInterface = objectServer.add_interface(
55*7427aeefSHarshit Aghera         portDbusPath, "xyz.openbmc_project.Inventory.Connector.Port");
56*7427aeefSHarshit Aghera 
57*7427aeefSHarshit Aghera     std::vector<Association> associations;
58*7427aeefSHarshit Aghera     associations.emplace_back("connected_to", "connecting", deviceDbusPath);
59*7427aeefSHarshit Aghera 
60*7427aeefSHarshit Aghera     associationInterface =
61*7427aeefSHarshit Aghera         objectServer.add_interface(portDbusPath, association::interface);
62*7427aeefSHarshit Aghera 
63*7427aeefSHarshit Aghera     associationInterface->register_property("Associations", associations);
64*7427aeefSHarshit Aghera 
65*7427aeefSHarshit Aghera     constexpr std::array<std::pair<uint8_t, const char*>, 21> telemetryMetrics =
66*7427aeefSHarshit Aghera         {{
67*7427aeefSHarshit Aghera             {0, "/nic/rx_bytes"},
68*7427aeefSHarshit Aghera             {1, "/nic/tx_bytes"},
69*7427aeefSHarshit Aghera             {2, "/nic/rx_unicast_frames"},
70*7427aeefSHarshit Aghera             {3, "/nic/rx_multicast_frames"},
71*7427aeefSHarshit Aghera             {4, "/nic/rx_broadcast_frames"},
72*7427aeefSHarshit Aghera             {5, "/nic/tx_unicast_frames"},
73*7427aeefSHarshit Aghera             {6, "/nic/tx_multicast_frames"},
74*7427aeefSHarshit Aghera             {7, "/nic/tx_broadcast_frames"},
75*7427aeefSHarshit Aghera             {8, "/nic/rx_fcs_errors"},
76*7427aeefSHarshit Aghera             {9, "/nic/rx_frame_alignment_errors"},
77*7427aeefSHarshit Aghera             {10, "/nic/rx_false_carrier_errors"},
78*7427aeefSHarshit Aghera             {11, "/nic/rx_undersize_frames"},
79*7427aeefSHarshit Aghera             {12, "/nic/rx_oversize_frames"},
80*7427aeefSHarshit Aghera             {13, "/nic/rx_pause_xon_frames"},
81*7427aeefSHarshit Aghera             {14, "/nic/rx_pause_xoff_frames"},
82*7427aeefSHarshit Aghera             {15, "/nic/tx_pause_xon_frames"},
83*7427aeefSHarshit Aghera             {16, "/nic/tx_pause_xoff_frames"},
84*7427aeefSHarshit Aghera             {17, "/nic/tx_single_collisions"},
85*7427aeefSHarshit Aghera             {18, "/nic/tx_multiple_collisions"},
86*7427aeefSHarshit Aghera             {19, "/nic/tx_late_collisions"},
87*7427aeefSHarshit Aghera             {20, "/nic/tx_excessive_collisions"},
88*7427aeefSHarshit Aghera         }};
89*7427aeefSHarshit Aghera 
90*7427aeefSHarshit Aghera     for (const auto& [tag, metricName] : telemetryMetrics)
91*7427aeefSHarshit Aghera     {
92*7427aeefSHarshit Aghera         metricValueInterface[tag] =
93*7427aeefSHarshit Aghera             objectServer.add_interface(metricsDbusPathPrefix + metricName,
94*7427aeefSHarshit Aghera                                        "xyz.openbmc_project.Metric.Value");
95*7427aeefSHarshit Aghera 
96*7427aeefSHarshit Aghera         metricValueInterface[tag]->register_property(
97*7427aeefSHarshit Aghera             "Unit", "xyz.openbmc_project.Metric.Value.Unit.Count"s);
98*7427aeefSHarshit Aghera         metricValueInterface[tag]->register_property("Value", 0.0);
99*7427aeefSHarshit Aghera 
100*7427aeefSHarshit Aghera         std::vector<Association> associations;
101*7427aeefSHarshit Aghera         associations.emplace_back("measuring", "measured_by", portDbusPath);
102*7427aeefSHarshit Aghera 
103*7427aeefSHarshit Aghera         metricAssociationInterfaces[tag] = objectServer.add_interface(
104*7427aeefSHarshit Aghera             metricsDbusPathPrefix + metricName, association::interface);
105*7427aeefSHarshit Aghera         metricAssociationInterfaces[tag]->register_property("Associations",
106*7427aeefSHarshit Aghera                                                             associations);
107*7427aeefSHarshit Aghera         if (!metricValueInterface[tag]->initialize())
108*7427aeefSHarshit Aghera         {
109*7427aeefSHarshit Aghera             lg2::error(
110*7427aeefSHarshit Aghera                 "Error initializing Ethernet Port Metric Interface for EID={EID}, PortNumber={PN}, Metric={MN}",
111*7427aeefSHarshit Aghera                 "EID", eid, "PN", portNumber, "MN", metricName);
112*7427aeefSHarshit Aghera         }
113*7427aeefSHarshit Aghera 
114*7427aeefSHarshit Aghera         if (!metricAssociationInterfaces[tag]->initialize())
115*7427aeefSHarshit Aghera         {
116*7427aeefSHarshit Aghera             lg2::error(
117*7427aeefSHarshit Aghera                 "Error initializing Ethernet Port Metric Association Interface for EID={EID}, PortNumber={PN}, Metric={MN}",
118*7427aeefSHarshit Aghera                 "EID", eid, "PN", portNumber, "MN", metricName);
119*7427aeefSHarshit Aghera         }
120*7427aeefSHarshit Aghera     }
121*7427aeefSHarshit Aghera 
122*7427aeefSHarshit Aghera     if (!portInterface->initialize())
123*7427aeefSHarshit Aghera     {
124*7427aeefSHarshit Aghera         lg2::error(
125*7427aeefSHarshit Aghera             "Error initializing Ethernet Port Interface for EID={EID}, PortNumber={PN}",
126*7427aeefSHarshit Aghera             "EID", eid, "PN", portNumber);
127*7427aeefSHarshit Aghera     }
128*7427aeefSHarshit Aghera 
129*7427aeefSHarshit Aghera     if (!associationInterface->initialize())
130*7427aeefSHarshit Aghera     {
131*7427aeefSHarshit Aghera         lg2::error(
132*7427aeefSHarshit Aghera             "Error initializing Association Interface for Ethernet Port for EID={EID}, PortNumber={PN}",
133*7427aeefSHarshit Aghera             "EID", eid, "PN", portNumber);
134*7427aeefSHarshit Aghera     }
135*7427aeefSHarshit Aghera }
136*7427aeefSHarshit Aghera 
processResponse(const std::error_code & sendRecvMsgResult,std::span<const uint8_t> response)137*7427aeefSHarshit Aghera void NvidiaEthPortMetrics::processResponse(
138*7427aeefSHarshit Aghera     const std::error_code& sendRecvMsgResult, std::span<const uint8_t> response)
139*7427aeefSHarshit Aghera {
140*7427aeefSHarshit Aghera     if (sendRecvMsgResult)
141*7427aeefSHarshit Aghera     {
142*7427aeefSHarshit Aghera         lg2::error(
143*7427aeefSHarshit Aghera             "Error updating Ethernet Port Metrics: sending message over MCTP failed, "
144*7427aeefSHarshit Aghera             "rc={RC}, EID={EID}, PortNumber={PN}",
145*7427aeefSHarshit Aghera             "RC", sendRecvMsgResult.message(), "EID", eid, "PN", portNumber);
146*7427aeefSHarshit Aghera         return;
147*7427aeefSHarshit Aghera     }
148*7427aeefSHarshit Aghera 
149*7427aeefSHarshit Aghera     ocp::accelerator_management::CompletionCode cc{};
150*7427aeefSHarshit Aghera     uint16_t reasonCode = 0;
151*7427aeefSHarshit Aghera     std::vector<std::pair<uint8_t, uint64_t>> telemetryValues;
152*7427aeefSHarshit Aghera 
153*7427aeefSHarshit Aghera     const int rc = gpu::decodeGetEthernetPortTelemetryCountersResponse(
154*7427aeefSHarshit Aghera         response, cc, reasonCode, telemetryValues);
155*7427aeefSHarshit Aghera 
156*7427aeefSHarshit Aghera     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
157*7427aeefSHarshit Aghera     {
158*7427aeefSHarshit Aghera         lg2::error(
159*7427aeefSHarshit Aghera             "Error updating Ethernet Port Metrics: decode failed, "
160*7427aeefSHarshit Aghera             "rc={RC}, cc={CC}, reasonCode={RESC}, EID={EID}, PortNumber={PN}",
161*7427aeefSHarshit Aghera             "RC", rc, "CC", static_cast<uint8_t>(cc), "RESC", reasonCode, "EID",
162*7427aeefSHarshit Aghera             eid, "PN", portNumber);
163*7427aeefSHarshit Aghera         return;
164*7427aeefSHarshit Aghera     }
165*7427aeefSHarshit Aghera 
166*7427aeefSHarshit Aghera     for (const auto& [tag, value] : telemetryValues)
167*7427aeefSHarshit Aghera     {
168*7427aeefSHarshit Aghera         if (tag < maxTelemetryValues && metricValueInterface[tag])
169*7427aeefSHarshit Aghera         {
170*7427aeefSHarshit Aghera             metricValueInterface[tag]->set_property("Value",
171*7427aeefSHarshit Aghera                                                     static_cast<double>(value));
172*7427aeefSHarshit Aghera         }
173*7427aeefSHarshit Aghera     }
174*7427aeefSHarshit Aghera }
175*7427aeefSHarshit Aghera 
update()176*7427aeefSHarshit Aghera void NvidiaEthPortMetrics::update()
177*7427aeefSHarshit Aghera {
178*7427aeefSHarshit Aghera     const int rc = gpu::encodeGetEthernetPortTelemetryCountersRequest(
179*7427aeefSHarshit Aghera         0, portNumber, request);
180*7427aeefSHarshit Aghera 
181*7427aeefSHarshit Aghera     if (rc != 0)
182*7427aeefSHarshit Aghera     {
183*7427aeefSHarshit Aghera         lg2::error(
184*7427aeefSHarshit Aghera             "Error updating Ethernet Port Metrics: encode failed, rc={RC}, EID={EID}, PortNumber={PN}",
185*7427aeefSHarshit Aghera             "RC", rc, "EID", eid, "PN", portNumber);
186*7427aeefSHarshit Aghera         return;
187*7427aeefSHarshit Aghera     }
188*7427aeefSHarshit Aghera 
189*7427aeefSHarshit Aghera     mctpRequester.sendRecvMsg(
190*7427aeefSHarshit Aghera         eid, request,
191*7427aeefSHarshit Aghera         [weak{weak_from_this()}](const std::error_code& ec,
192*7427aeefSHarshit Aghera                                  std::span<const uint8_t> buffer) {
193*7427aeefSHarshit Aghera             std::shared_ptr<NvidiaEthPortMetrics> self = weak.lock();
194*7427aeefSHarshit Aghera             if (!self)
195*7427aeefSHarshit Aghera             {
196*7427aeefSHarshit Aghera                 lg2::error("Invalid reference to NvidiaEthPortMetrics");
197*7427aeefSHarshit Aghera                 return;
198*7427aeefSHarshit Aghera             }
199*7427aeefSHarshit Aghera             self->processResponse(ec, buffer);
200*7427aeefSHarshit Aghera         });
201*7427aeefSHarshit Aghera }
202