xref: /openbmc/smbios-mdr/src/speed_select.cpp (revision a30229e1)
1 // Copyright (c) 2020 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "speed_select.hpp"
16 
17 #include "cpuinfo.hpp"
18 #include "cpuinfo_utils.hpp"
19 
20 #include <peci.h>
21 
22 #include <boost/asio/steady_timer.hpp>
23 #include <xyz/openbmc_project/Common/Device/error.hpp>
24 #include <xyz/openbmc_project/Common/error.hpp>
25 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp>
26 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp>
27 
28 #include <algorithm>
29 #include <iostream>
30 #include <memory>
31 #include <stdexcept>
32 #include <string>
33 
34 namespace cpu_info
35 {
36 namespace sst
37 {
38 
39 // Specialize char to print the integer value instead of ascii. We basically
40 // never want to print a single ascii char.
41 std::ostream& operator<<(std::ostream& os, uint8_t value)
42 {
43     return os << static_cast<int>(value);
44 }
45 
46 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode)
47 {
48     if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS)
49     {
50         std::cerr << "PECI command failed."
51                   << " Driver Status = " << libStatus << ","
52                   << " Completion Code = " << completionCode << '\n';
53         return false;
54     }
55     return true;
56 }
57 
58 static std::vector<BackendProvider>& getProviders()
59 {
60     static auto* providers = new std::vector<BackendProvider>;
61     return *providers;
62 }
63 
64 void registerBackend(BackendProvider providerFn)
65 {
66     getProviders().push_back(providerFn);
67 }
68 
69 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model)
70 {
71     DEBUG_PRINT << "Searching for provider for " << address << ", model "
72                 << std::hex << model << '\n';
73     for (const auto& provider : getProviders())
74     {
75         try
76         {
77             auto interface = provider(address, model);
78             DEBUG_PRINT << "returned " << interface << '\n';
79             if (interface)
80             {
81                 return interface;
82             }
83         }
84         catch (...)
85         {}
86     }
87     DEBUG_PRINT << "No supported backends found\n";
88     return nullptr;
89 }
90 
91 using BaseCurrentOperatingConfig =
92     sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Control::
93                                     Processor::server::CurrentOperatingConfig>;
94 
95 using BaseOperatingConfig =
96     sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Inventory::
97                                     Item::Cpu::server::OperatingConfig>;
98 
99 class OperatingConfig : public BaseOperatingConfig
100 {
101   public:
102     std::string path;
103     unsigned int level;
104 
105   public:
106     using BaseOperatingConfig::BaseOperatingConfig;
107     OperatingConfig(sdbusplus::bus::bus& bus, unsigned int level_,
108                     std::string path_) :
109         BaseOperatingConfig(bus, path_.c_str(), action::defer_emit),
110         path(std::move(path_)), level(level_)
111     {}
112 };
113 
114 class CPUConfig : public BaseCurrentOperatingConfig
115 {
116   private:
117     /** Objects describing all available SST configs - not modifiable. */
118     std::vector<std::unique_ptr<OperatingConfig>> availConfigs;
119     sdbusplus::bus::bus& bus;
120     const uint8_t peciAddress;
121     const std::string path; ///< D-Bus path of CPU object
122     const CPUModel cpuModel;
123 
124     // Keep mutable copies of the properties so we can cache values that we
125     // retrieve in the getters. We don't want to throw an error on a D-Bus
126     // get-property call (extra error handling in clients), so by caching we can
127     // hide any temporary hiccup in PECI communication.
128     // These values can be changed by in-band software so we have to do a full
129     // PECI read on every get-property, and can't assume that values will change
130     // only when set-property is done.
131     mutable unsigned int currentLevel;
132     mutable bool bfEnabled;
133 
134     /**
135      * Enforce common pre-conditions for D-Bus set property handlers.
136      */
137     void setPropertyCheckOrThrow(SSTInterface& sst)
138     {
139         if (!sst.supportsControl())
140         {
141             throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
142         }
143         if (hostState != HostState::postComplete || !sst.ready())
144         {
145             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
146         }
147     }
148 
149   public:
150     CPUConfig(sdbusplus::bus::bus& bus_, uint8_t index, CPUModel model) :
151         BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(),
152                                    action::defer_emit),
153         bus(bus_), peciAddress(index + MIN_CLIENT_ADDR),
154         path(generatePath(index)), cpuModel(model), currentLevel(0),
155         bfEnabled(false)
156     {}
157 
158     //
159     // D-Bus Property Overrides
160     //
161 
162     sdbusplus::message::object_path appliedConfig() const override
163     {
164         DEBUG_PRINT << "Reading AppliedConfig\n";
165         // If CPU is powered off, return power-up default value of Level 0.
166         unsigned int level = 0;
167         if (hostState != HostState::off)
168         {
169             // Otherwise, try to read current state
170             auto sst = getInstance(peciAddress, cpuModel);
171             if (!sst)
172             {
173                 std::cerr << __func__
174                           << ": Failed to get SST provider instance\n";
175             }
176             else
177             {
178                 try
179                 {
180                     currentLevel = sst->currentLevel();
181                 }
182                 catch (const PECIError& error)
183                 {
184                     std::cerr << "Failed to get SST-PP level: " << error.what()
185                               << "\n";
186                 }
187             }
188             level = currentLevel;
189         }
190         return generateConfigPath(level);
191     }
192 
193     bool baseSpeedPriorityEnabled() const override
194     {
195         DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n";
196         bool enabled = false;
197         if (hostState != HostState::off)
198         {
199             auto sst = getInstance(peciAddress, cpuModel);
200             if (!sst)
201             {
202                 std::cerr << __func__
203                           << ": Failed to get SST provider instance\n";
204             }
205             else
206             {
207                 try
208                 {
209                     bfEnabled = sst->bfEnabled(currentLevel);
210                 }
211                 catch (const PECIError& error)
212                 {
213                     std::cerr << "Failed to get SST-BF status: " << error.what()
214                               << "\n";
215                 }
216             }
217             enabled = bfEnabled;
218         }
219         return enabled;
220     }
221 
222     sdbusplus::message::object_path
223         appliedConfig(sdbusplus::message::object_path value) override
224     {
225         DEBUG_PRINT << "Writing AppliedConfig\n";
226         const OperatingConfig* newConfig = nullptr;
227         for (const auto& config : availConfigs)
228         {
229             if (config->path == value.str)
230             {
231                 newConfig = config.get();
232             }
233         }
234 
235         if (newConfig == nullptr)
236         {
237             throw sdbusplus::xyz::openbmc_project::Common::Error::
238                 InvalidArgument();
239         }
240 
241         auto sst = getInstance(peciAddress, cpuModel);
242         if (!sst)
243         {
244             std::cerr << __func__ << ": Failed to get SST provider instance\n";
245             return sdbusplus::message::object_path();
246         }
247         setPropertyCheckOrThrow(*sst);
248         try
249         {
250             sst->setCurrentLevel(newConfig->level);
251             currentLevel = newConfig->level;
252         }
253         catch (const PECIError& error)
254         {
255             std::cerr << "Failed to set new SST-PP level: " << error.what()
256                       << "\n";
257             throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
258                 WriteFailure();
259         }
260 
261         // return value not used
262         return sdbusplus::message::object_path();
263     }
264 
265     bool baseSpeedPriorityEnabled(bool value) override
266     {
267         DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled\n";
268         auto sst = getInstance(peciAddress, cpuModel);
269         if (!sst)
270         {
271             std::cerr << __func__ << ": Failed to get SST provider instance\n";
272             return false;
273         }
274         setPropertyCheckOrThrow(*sst);
275         try
276         {
277             sst->setBfEnabled(value);
278         }
279         catch (const PECIError& error)
280         {
281             std::cerr << "Failed to set SST-BF status: " << error.what()
282                       << "\n";
283             throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
284                 WriteFailure();
285         }
286 
287         // return value not used
288         return false;
289     }
290 
291     //
292     // Additions
293     //
294 
295     OperatingConfig& newConfig(unsigned int level)
296     {
297         availConfigs.emplace_back(std::make_unique<OperatingConfig>(
298             bus, level, generateConfigPath(level)));
299         return *availConfigs.back();
300     }
301 
302     std::string generateConfigPath(unsigned int level) const
303     {
304         return path + "/config" + std::to_string(level);
305     }
306 
307     /**
308      * Emit the interface added signals which were deferred. This is required
309      * for ObjectMapper to pick up the objects, if we initially defered the
310      * signal emitting.
311      */
312     void finalize()
313     {
314         emit_added();
315         for (auto& config : availConfigs)
316         {
317             config->emit_added();
318         }
319     }
320 
321     static std::string generatePath(int index)
322     {
323         return cpuPath + std::to_string(index);
324     }
325 };
326 
327 /**
328  * Retrieve the SST parameters for a single config and fill the values into the
329  * properties on the D-Bus interface.
330  *
331  * @param[in,out]   sst         Interface to SST backend.
332  * @param[in]       level       Config TDP level to retrieve.
333  * @param[out]      config      D-Bus interface to update.
334  */
335 static void getSingleConfig(SSTInterface& sst, unsigned int level,
336                             OperatingConfig& config)
337 {
338     config.powerLimit(sst.tdp(level));
339 
340     config.availableCoreCount(sst.coreCount(level));
341 
342     config.baseSpeed(sst.p1Freq(level));
343 
344     config.maxSpeed(sst.p0Freq(level));
345 
346     config.maxJunctionTemperature(sst.prochotTemp(level));
347 
348     // Construct BaseSpeedPrioritySettings
349     std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds;
350     if (sst.bfSupported(level))
351     {
352         std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList;
353         totalCoreList = sst.enabledCoreList(level);
354         hiFreqCoreList = sst.bfHighPriorityCoreList(level);
355         std::set_difference(
356             totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(),
357             hiFreqCoreList.end(),
358             std::inserter(loFreqCoreList, loFreqCoreList.begin()));
359 
360         baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList},
361                       {sst.bfLowPriorityFreq(level), loFreqCoreList}};
362     }
363     config.baseSpeedPrioritySettings(baseSpeeds);
364 
365     config.turboProfile(sst.sseTurboProfile(level));
366 }
367 
368 /**
369  * Retrieve all SST configuration info for all discoverable CPUs, and publish
370  * the info on new D-Bus objects on the given bus connection.
371  *
372  * @param[out]  cpuList     List to append info about discovered CPUs,
373  *                          including pointers to D-Bus objects to keep them
374  *                          alive. No items may be added to list in case host
375  *                          system is powered off and no CPUs are accessible.
376  * @param[in,out]   ioc     ASIO context.
377  * @param[in,out]   conn    D-Bus ASIO connection.
378  *
379  * @return  Whether discovery was successfully finished.
380  *
381  * @throw PECIError     A PECI command failed on a CPU which had previously
382  *                      responded to a command.
383  */
384 static bool
385     discoverCPUsAndConfigs(std::vector<std::unique_ptr<CPUConfig>>& cpuList,
386                            boost::asio::io_context& ioc,
387                            sdbusplus::asio::connection& conn)
388 {
389     for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
390     {
391         // Let the event handler run any waiting tasks. If there is a lot of
392         // PECI contention, SST discovery could take a long time. This lets us
393         // get updates to hostState and handle any D-Bus requests.
394         ioc.poll();
395 
396         if (hostState == HostState::off)
397         {
398             return false;
399         }
400 
401         unsigned int cpuIndex = i - MIN_CLIENT_ADDR;
402         DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n';
403 
404         // We could possibly check D-Bus for CPU presence and model, but PECI is
405         // 10x faster and so much simpler.
406         uint8_t cc, stepping;
407         CPUModel cpuModel;
408         EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc);
409         if (status == PECI_CC_TIMEOUT)
410         {
411             // Timing out indicates the CPU is present but PCS services not
412             // working yet. Try again later.
413             throw PECIError("Get CPUID timed out");
414         }
415         if (status == PECI_CC_CPU_NOT_PRESENT)
416         {
417             continue;
418         }
419         if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS)
420         {
421             std::cerr << "GetCPUID returned status " << status
422                       << ", cc = " << cc << '\n';
423             continue;
424         }
425 
426         std::unique_ptr<SSTInterface> sst = getInstance(i, cpuModel);
427 
428         if (!sst)
429         {
430             // No supported backend for this CPU.
431             continue;
432         }
433 
434         if (!sst->ready())
435         {
436             // Supported CPU but it can't be queried yet. Try again later.
437             std::cerr << "sst not ready yet\n";
438             return false;
439         }
440 
441         if (!sst->ppEnabled())
442         {
443             // Supported CPU but the specific SKU doesn't support SST-PP.
444             std::cerr << "CPU doesn't support SST-PP\n";
445             continue;
446         }
447 
448         // Create the per-CPU configuration object
449         cpuList.emplace_back(
450             std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel));
451         CPUConfig& cpu = *cpuList.back();
452 
453         bool foundCurrentLevel = false;
454 
455         for (unsigned int level = 0; level <= sst->numLevels(); ++level)
456         {
457             // levels 1 and 2 were legacy/deprecated, originally used for AVX
458             // license pre-granting. They may be reused for more levels in
459             // future generations. So we need to check for discontinuities.
460             if (!sst->levelSupported(level))
461             {
462                 continue;
463             }
464 
465             getSingleConfig(*sst, level, cpu.newConfig(level));
466 
467             if (level == sst->currentLevel())
468             {
469                 foundCurrentLevel = true;
470             }
471         }
472 
473         if (!foundCurrentLevel)
474         {
475             // In case we didn't encounter a PECI error, but also didn't find
476             // the config which is supposedly applied, we won't be able to
477             // populate the CurrentOperatingConfig so we have to remove this CPU
478             // from consideration.
479             std::cerr << "CPU " << cpuIndex
480                       << " claimed SST support but invalid configs\n";
481             cpuList.pop_back();
482             continue;
483         }
484 
485         cpu.finalize();
486     }
487 
488     return true;
489 }
490 
491 void init(boost::asio::io_context& ioc,
492           const std::shared_ptr<sdbusplus::asio::connection>& conn)
493 {
494     static boost::asio::steady_timer peciRetryTimer(ioc);
495     static std::vector<std::unique_ptr<CPUConfig>> cpus;
496     static int peciErrorCount = 0;
497 
498     bool finished = false;
499     try
500     {
501         DEBUG_PRINT << "Starting discovery\n";
502         finished = discoverCPUsAndConfigs(cpus, ioc, *conn);
503     }
504     catch (const PECIError& err)
505     {
506         std::cerr << "PECI Error: " << err.what() << '\n';
507 
508         // In case of repeated failure to finish discovery, turn off this
509         // feature altogether. Possible cause is that the CPU model does not
510         // actually support the necessary commands.
511         if (++peciErrorCount >= 50)
512         {
513             std::cerr << "Aborting SST discovery\n";
514             return;
515         }
516 
517         std::cerr << "Retrying SST discovery later\n";
518     }
519 
520     DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n';
521 
522     // Retry later if no CPUs were available, or there was a PECI error.
523     if (!finished)
524     {
525         // Drop any created interfaces to avoid presenting incomplete info
526         cpus.clear();
527         peciRetryTimer.expires_after(std::chrono::seconds(10));
528         peciRetryTimer.async_wait([&ioc, conn](boost::system::error_code ec) {
529             if (ec)
530             {
531                 std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
532                 return;
533             }
534             init(ioc, conn);
535         });
536     }
537 }
538 
539 } // namespace sst
540 } // namespace cpu_info
541