1*e0b80e1eSHarshit Aghera /*
2*e0b80e1eSHarshit Aghera * SPDX-FileCopyrightText: Copyright OpenBMC Authors
3*e0b80e1eSHarshit Aghera * SPDX-License-Identifier: Apache-2.0
4*e0b80e1eSHarshit Aghera */
5*e0b80e1eSHarshit Aghera
6*e0b80e1eSHarshit Aghera #include "NvidiaPcieInterface.hpp"
7*e0b80e1eSHarshit Aghera
8*e0b80e1eSHarshit Aghera #include "Utils.hpp"
9*e0b80e1eSHarshit Aghera
10*e0b80e1eSHarshit Aghera #include <bits/basic_string.h>
11*e0b80e1eSHarshit Aghera
12*e0b80e1eSHarshit Aghera #include <MctpRequester.hpp>
13*e0b80e1eSHarshit Aghera #include <NvidiaGpuMctpVdm.hpp>
14*e0b80e1eSHarshit Aghera #include <NvidiaPcieDevice.hpp>
15*e0b80e1eSHarshit Aghera #include <OcpMctpVdm.hpp>
16*e0b80e1eSHarshit Aghera #include <phosphor-logging/lg2.hpp>
17*e0b80e1eSHarshit Aghera #include <sdbusplus/asio/connection.hpp>
18*e0b80e1eSHarshit Aghera #include <sdbusplus/asio/object_server.hpp>
19*e0b80e1eSHarshit Aghera
20*e0b80e1eSHarshit Aghera #include <cmath>
21*e0b80e1eSHarshit Aghera #include <cstddef>
22*e0b80e1eSHarshit Aghera #include <cstdint>
23*e0b80e1eSHarshit Aghera #include <functional>
24*e0b80e1eSHarshit Aghera #include <limits>
25*e0b80e1eSHarshit Aghera #include <memory>
26*e0b80e1eSHarshit Aghera #include <span>
27*e0b80e1eSHarshit Aghera #include <string>
28*e0b80e1eSHarshit Aghera #include <system_error>
29*e0b80e1eSHarshit Aghera #include <vector>
30*e0b80e1eSHarshit Aghera
31*e0b80e1eSHarshit Aghera using std::string;
32*e0b80e1eSHarshit Aghera
33*e0b80e1eSHarshit Aghera using namespace std::literals;
34*e0b80e1eSHarshit Aghera
NvidiaPcieInterface(std::shared_ptr<sdbusplus::asio::connection> & conn,mctp::MctpRequester & mctpRequester,const std::string & name,const std::string & path,uint8_t eid,sdbusplus::asio::object_server & objectServer)35*e0b80e1eSHarshit Aghera NvidiaPcieInterface::NvidiaPcieInterface(
36*e0b80e1eSHarshit Aghera std::shared_ptr<sdbusplus::asio::connection>& conn,
37*e0b80e1eSHarshit Aghera mctp::MctpRequester& mctpRequester, const std::string& name,
38*e0b80e1eSHarshit Aghera const std::string& path, uint8_t eid,
39*e0b80e1eSHarshit Aghera sdbusplus::asio::object_server& objectServer) :
40*e0b80e1eSHarshit Aghera eid(eid), path(path), conn(conn), mctpRequester(mctpRequester)
41*e0b80e1eSHarshit Aghera {
42*e0b80e1eSHarshit Aghera const std::string dbusPath = pcieDevicePathPrefix + escapeName(name);
43*e0b80e1eSHarshit Aghera
44*e0b80e1eSHarshit Aghera pcieDeviceInterface = objectServer.add_interface(
45*e0b80e1eSHarshit Aghera dbusPath, "xyz.openbmc_project.Inventory.Item.PCIeDevice");
46*e0b80e1eSHarshit Aghera
47*e0b80e1eSHarshit Aghera switchInterface = objectServer.add_interface(
48*e0b80e1eSHarshit Aghera dbusPath, "xyz.openbmc_project.Inventory.Item.PCIeSwitch");
49*e0b80e1eSHarshit Aghera
50*e0b80e1eSHarshit Aghera pcieDeviceInterface->register_property(
51*e0b80e1eSHarshit Aghera "GenerationInUse",
52*e0b80e1eSHarshit Aghera std::string(
53*e0b80e1eSHarshit Aghera "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown"));
54*e0b80e1eSHarshit Aghera
55*e0b80e1eSHarshit Aghera pcieDeviceInterface->register_property("LanesInUse",
56*e0b80e1eSHarshit Aghera std::numeric_limits<size_t>::max());
57*e0b80e1eSHarshit Aghera
58*e0b80e1eSHarshit Aghera pcieDeviceInterface->register_property(
59*e0b80e1eSHarshit Aghera "GenerationSupported",
60*e0b80e1eSHarshit Aghera std::string(
61*e0b80e1eSHarshit Aghera "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown"));
62*e0b80e1eSHarshit Aghera
63*e0b80e1eSHarshit Aghera pcieDeviceInterface->register_property("MaxLanes", static_cast<size_t>(0));
64*e0b80e1eSHarshit Aghera
65*e0b80e1eSHarshit Aghera if (!pcieDeviceInterface->initialize())
66*e0b80e1eSHarshit Aghera {
67*e0b80e1eSHarshit Aghera lg2::error("Error initializing PCIe Device Interface for EID={EID}",
68*e0b80e1eSHarshit Aghera "EID", eid);
69*e0b80e1eSHarshit Aghera }
70*e0b80e1eSHarshit Aghera
71*e0b80e1eSHarshit Aghera if (!switchInterface->initialize())
72*e0b80e1eSHarshit Aghera {
73*e0b80e1eSHarshit Aghera lg2::error("Error initializing Switch Interface for EID={EID}", "EID",
74*e0b80e1eSHarshit Aghera eid);
75*e0b80e1eSHarshit Aghera }
76*e0b80e1eSHarshit Aghera }
77*e0b80e1eSHarshit Aghera
mapPcieGeneration(uint32_t value)78*e0b80e1eSHarshit Aghera string NvidiaPcieInterface::mapPcieGeneration(uint32_t value)
79*e0b80e1eSHarshit Aghera {
80*e0b80e1eSHarshit Aghera switch (value)
81*e0b80e1eSHarshit Aghera {
82*e0b80e1eSHarshit Aghera case 1:
83*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen1";
84*e0b80e1eSHarshit Aghera case 2:
85*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen2";
86*e0b80e1eSHarshit Aghera case 3:
87*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen3";
88*e0b80e1eSHarshit Aghera case 4:
89*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen4";
90*e0b80e1eSHarshit Aghera case 5:
91*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen5";
92*e0b80e1eSHarshit Aghera case 6:
93*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Gen6";
94*e0b80e1eSHarshit Aghera default:
95*e0b80e1eSHarshit Aghera return "xyz.openbmc_project.Inventory.Item.PCIeSlot.Generations.Unknown";
96*e0b80e1eSHarshit Aghera }
97*e0b80e1eSHarshit Aghera }
98*e0b80e1eSHarshit Aghera
decodeLinkWidth(uint32_t value)99*e0b80e1eSHarshit Aghera size_t NvidiaPcieInterface::decodeLinkWidth(uint32_t value)
100*e0b80e1eSHarshit Aghera {
101*e0b80e1eSHarshit Aghera return (value > 0) ? pow(2, value - 1) : 0;
102*e0b80e1eSHarshit Aghera }
103*e0b80e1eSHarshit Aghera
processResponse(const std::error_code & ec,std::span<const uint8_t> response)104*e0b80e1eSHarshit Aghera void NvidiaPcieInterface::processResponse(const std::error_code& ec,
105*e0b80e1eSHarshit Aghera std::span<const uint8_t> response)
106*e0b80e1eSHarshit Aghera {
107*e0b80e1eSHarshit Aghera if (ec)
108*e0b80e1eSHarshit Aghera {
109*e0b80e1eSHarshit Aghera lg2::error(
110*e0b80e1eSHarshit Aghera "Error updating PCIe Interface: sending message over MCTP failed, "
111*e0b80e1eSHarshit Aghera "rc={RC}, EID={EID}",
112*e0b80e1eSHarshit Aghera "RC", ec.value(), "EID", eid);
113*e0b80e1eSHarshit Aghera return;
114*e0b80e1eSHarshit Aghera }
115*e0b80e1eSHarshit Aghera
116*e0b80e1eSHarshit Aghera ocp::accelerator_management::CompletionCode cc{};
117*e0b80e1eSHarshit Aghera uint16_t reasonCode = 0;
118*e0b80e1eSHarshit Aghera size_t numTelemetryValue = 0;
119*e0b80e1eSHarshit Aghera
120*e0b80e1eSHarshit Aghera auto rc = gpu::decodeQueryScalarGroupTelemetryV2Response(
121*e0b80e1eSHarshit Aghera response, cc, reasonCode, numTelemetryValue, telemetryValues);
122*e0b80e1eSHarshit Aghera
123*e0b80e1eSHarshit Aghera if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS)
124*e0b80e1eSHarshit Aghera {
125*e0b80e1eSHarshit Aghera lg2::error("Error updating PCIe Interface: decode failed, "
126*e0b80e1eSHarshit Aghera "rc={RC}, cc={CC}, reasonCode={RESC}, EID={EID}",
127*e0b80e1eSHarshit Aghera "RC", rc, "CC", static_cast<uint8_t>(cc), "RESC", reasonCode,
128*e0b80e1eSHarshit Aghera "EID", eid);
129*e0b80e1eSHarshit Aghera return;
130*e0b80e1eSHarshit Aghera }
131*e0b80e1eSHarshit Aghera
132*e0b80e1eSHarshit Aghera if (!telemetryValues.empty())
133*e0b80e1eSHarshit Aghera {
134*e0b80e1eSHarshit Aghera pcieDeviceInterface->set_property(
135*e0b80e1eSHarshit Aghera "GenerationInUse", mapPcieGeneration(telemetryValues[0]));
136*e0b80e1eSHarshit Aghera }
137*e0b80e1eSHarshit Aghera
138*e0b80e1eSHarshit Aghera if (telemetryValues.size() > 1)
139*e0b80e1eSHarshit Aghera {
140*e0b80e1eSHarshit Aghera pcieDeviceInterface->set_property(
141*e0b80e1eSHarshit Aghera "LanesInUse",
142*e0b80e1eSHarshit Aghera decodeLinkWidth(static_cast<size_t>(telemetryValues[1])));
143*e0b80e1eSHarshit Aghera }
144*e0b80e1eSHarshit Aghera
145*e0b80e1eSHarshit Aghera if (telemetryValues.size() > 3)
146*e0b80e1eSHarshit Aghera {
147*e0b80e1eSHarshit Aghera pcieDeviceInterface->set_property(
148*e0b80e1eSHarshit Aghera "GenerationSupported", mapPcieGeneration(telemetryValues[3]));
149*e0b80e1eSHarshit Aghera }
150*e0b80e1eSHarshit Aghera
151*e0b80e1eSHarshit Aghera if (telemetryValues.size() > 4)
152*e0b80e1eSHarshit Aghera {
153*e0b80e1eSHarshit Aghera pcieDeviceInterface->set_property(
154*e0b80e1eSHarshit Aghera "MaxLanes",
155*e0b80e1eSHarshit Aghera decodeLinkWidth(static_cast<size_t>(telemetryValues[4])));
156*e0b80e1eSHarshit Aghera }
157*e0b80e1eSHarshit Aghera }
158*e0b80e1eSHarshit Aghera
update()159*e0b80e1eSHarshit Aghera void NvidiaPcieInterface::update()
160*e0b80e1eSHarshit Aghera {
161*e0b80e1eSHarshit Aghera auto rc =
162*e0b80e1eSHarshit Aghera gpu::encodeQueryScalarGroupTelemetryV2Request(0, {}, 0, 0, 1, request);
163*e0b80e1eSHarshit Aghera
164*e0b80e1eSHarshit Aghera if (rc != 0)
165*e0b80e1eSHarshit Aghera {
166*e0b80e1eSHarshit Aghera lg2::error("Error updating PCIe Interface: failed, rc={RC}, EID={EID}",
167*e0b80e1eSHarshit Aghera "RC", rc, "EID", eid);
168*e0b80e1eSHarshit Aghera return;
169*e0b80e1eSHarshit Aghera }
170*e0b80e1eSHarshit Aghera
171*e0b80e1eSHarshit Aghera mctpRequester.sendRecvMsg(
172*e0b80e1eSHarshit Aghera eid, request,
173*e0b80e1eSHarshit Aghera [weak{weak_from_this()}](const std::error_code& ec,
174*e0b80e1eSHarshit Aghera std::span<const uint8_t> buffer) {
175*e0b80e1eSHarshit Aghera std::shared_ptr<NvidiaPcieInterface> self = weak.lock();
176*e0b80e1eSHarshit Aghera if (!self)
177*e0b80e1eSHarshit Aghera {
178*e0b80e1eSHarshit Aghera lg2::error(
179*e0b80e1eSHarshit Aghera "Invalid reference to NvidiaPcieInterface for EID {EID}",
180*e0b80e1eSHarshit Aghera "EID", self->eid);
181*e0b80e1eSHarshit Aghera return;
182*e0b80e1eSHarshit Aghera }
183*e0b80e1eSHarshit Aghera self->processResponse(ec, buffer);
184*e0b80e1eSHarshit Aghera });
185*e0b80e1eSHarshit Aghera }
186