/* * SPDX-FileCopyrightText: Copyright OpenBMC Authors * SPDX-License-Identifier: Apache-2.0 */ #include "NvidiaPciePortMetrics.hpp" #include "NvidiaUtils.hpp" #include "Utils.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using std::string; using namespace std::literals; constexpr const char* metricInterface = "xyz.openbmc_project.Metric.Value"; NvidiaPciePortMetrics::NvidiaPciePortMetrics( std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const std::string& name, const std::string& pcieDeviceName, const std::string& path, uint8_t eid, gpu::PciePortType portType, uint8_t upstreamPortNumber, uint8_t portNumber, sdbusplus::asio::object_server& objectServer, uint8_t scalarGroupId, const std::vector& metricsInfo) : eid(eid), portType(portType), upstreamPortNumber(upstreamPortNumber), portNumber(portNumber), scalarGroupId(scalarGroupId), path(path), conn(conn), mctpRequester(mctpRequester) { const std::string metricsDbusPathPrefix = metricPath + std::format("port_{}_{}", pcieDeviceName, name); const sdbusplus::message::object_path portDbusPath = sdbusplus::message::object_path(pcieDevicePathPrefix) / pcieDeviceName / name; for (const auto& [id, name] : metricsInfo) { const std::string metricsDbusPath = metricsDbusPathPrefix + name; metricValueInterfaces[id] = objectServer.add_interface(metricsDbusPath, metricInterface); metricValueInterfaces[id]->register_property( "Unit", "xyz.openbmc_project.Metric.Value.Unit.Count"s); metricValueInterfaces[id]->register_property("Value", 0.0); std::vector associations; associations.emplace_back("measuring", "measured_by", portDbusPath); metricAssociationInterfaces[id] = objectServer.add_interface(metricsDbusPath, association::interface); metricAssociationInterfaces[id]->register_property("Associations", associations); if (!metricValueInterfaces[id]->initialize()) { lg2::error( "Error initializing PCIe Port Metric Interface for EID={EID}, " "PortType={PT}, PortNumber={PN}, ScalarGroup={SG}, Metric={MN}", "EID", eid, "PT", static_cast(portType), "PN", portNumber, "EID", eid, "PN", portNumber, "SG", scalarGroupId, "MN", name); } if (!metricAssociationInterfaces[id]->initialize()) { lg2::error( "Error initializing PCIe Port Metric Association Interface for EID={EID}, " "PortType={PT}, PortNumber={PN}, ScalarGroup={SG}, Metric={MN}", "EID", eid, "PT", static_cast(portType), "PN", portNumber, "EID", eid, "PN", portNumber, "SG", scalarGroupId, "MN", name); } } } void NvidiaPciePortMetrics::processResponse( const std::error_code& sendRecvMsgResult, std::span response) { if (sendRecvMsgResult) { lg2::error( "Error updating PCIe Port Metrics: sending message over MCTP failed, " "rc={RC}, EID={EID}, PortType={PT}, PortNumber={PN}, ScalarGroup={SG}", "RC", sendRecvMsgResult.message(), "EID", eid, "PT", static_cast(portType), "PN", portNumber, "SG", scalarGroupId); return; } ocp::accelerator_management::CompletionCode cc{}; uint16_t reasonCode = 0; size_t numTelemetryValue = 0; int rc = gpu::decodeQueryScalarGroupTelemetryV2Response( response, cc, reasonCode, numTelemetryValue, telemetryValues); if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) { lg2::error( "Error updating PCIe Port Errors: decode failed, " "rc={RC}, cc={CC}, reasonCode={RESC}, EID={EID}, PortType={PT}, PortNumber={PN}, ScalarGroup={SG}", "RC", rc, "CC", static_cast(cc), "RESC", reasonCode, "EID", eid, "PT", static_cast(portType), "PN", portNumber, "SG", scalarGroupId); return; } for (size_t i = 0; i < numTelemetryValue; ++i) { if (metricValueInterfaces[i] != nullptr) { metricValueInterfaces[i]->set_property( "Value", static_cast(telemetryValues[i])); } } } void NvidiaPciePortMetrics::update() { auto rc = gpu::encodeQueryScalarGroupTelemetryV2Request( 0, portType, upstreamPortNumber, portNumber, scalarGroupId, request); if (rc != 0) { lg2::error( "Error updating PCIe Port Errors: encode failed, rc={RC}, EID={EID}, PortType={PT}, PortNumber={PN}, ScalarGroup={SG}", "RC", rc, "EID", eid, "PT", static_cast(portType), "PN", portNumber, "SG", scalarGroupId); return; } mctpRequester.sendRecvMsg( eid, request, [weak{weak_from_this()}](const std::error_code& ec, std::span buffer) { std::shared_ptr self = weak.lock(); if (!self) { lg2::error("Invalid reference to NvidiaPciePortErrors"); return; } self->processResponse(ec, buffer); }); } std::shared_ptr makeNvidiaPciePortErrors( std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const std::string& name, const std::string& pcieDeviceName, const std::string& path, uint8_t eid, gpu::PciePortType portType, uint8_t upstreamPortNumber, uint8_t portNumber, sdbusplus::asio::object_server& objectServer) { static constexpr uint8_t nvidiaPciePortErrorScalarGroupId = 2; return std::make_shared( conn, mctpRequester, name, pcieDeviceName, path, eid, portType, upstreamPortNumber, portNumber, objectServer, nvidiaPciePortErrorScalarGroupId, std::vector{ {0, "/pcie/non_fatal_error_count"}, {1, "/pcie/fatal_error_count"}, {2, "/pcie/unsupported_request_count"}, {3, "/pcie/correctable_error_count"}, }); } std::shared_ptr makeNvidiaPciePortCounters( std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const std::string& name, const std::string& pcieDeviceName, const std::string& path, uint8_t eid, gpu::PciePortType portType, uint8_t upstreamPortNumber, uint8_t portNumber, sdbusplus::asio::object_server& objectServer) { static constexpr uint8_t nvidiaPciePortCounterScalarGroupId = 4; return std::make_shared( conn, mctpRequester, name, pcieDeviceName, path, eid, portType, upstreamPortNumber, portNumber, objectServer, nvidiaPciePortCounterScalarGroupId, std::vector{ {1, "/pcie/nak_received_count"}, {2, "/pcie/nak_sent_count"}, {4, "/pcie/replay_rollover_count"}, {6, "/pcie/replay_count"}, }); } std::shared_ptr makeNvidiaPciePortL0ToRecoveryCount( std::shared_ptr& conn, mctp::MctpRequester& mctpRequester, const std::string& name, const std::string& pcieDeviceName, const std::string& path, uint8_t eid, gpu::PciePortType portType, uint8_t upstreamPortNumber, uint8_t portNumber, sdbusplus::asio::object_server& objectServer) { static constexpr uint8_t nvidiaPciePortL0ToRecoveryCountScalarGroupId = 3; return std::make_shared( conn, mctpRequester, name, pcieDeviceName, path, eid, portType, upstreamPortNumber, portNumber, objectServer, nvidiaPciePortL0ToRecoveryCountScalarGroupId, std::vector{ {0, "/pcie/l0_to_recovery_count"}, }); }