xref: /openbmc/smbios-mdr/src/speed_select.cpp (revision defbc2ac)
1 // Copyright (c) 2020 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "speed_select.hpp"
16 
17 #include "cpuinfo.hpp"
18 #include "cpuinfo_utils.hpp"
19 
20 #include <peci.h>
21 
22 #include <boost/asio/error.hpp>
23 #include <boost/asio/steady_timer.hpp>
24 #include <xyz/openbmc_project/Common/Device/error.hpp>
25 #include <xyz/openbmc_project/Common/error.hpp>
26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp>
27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp>
28 
29 #include <algorithm>
30 #include <iostream>
31 #include <memory>
32 #include <stdexcept>
33 #include <string>
34 
35 namespace cpu_info
36 {
37 namespace sst
38 {
39 
40 // Specialize char to print the integer value instead of ascii. We basically
41 // never want to print a single ascii char.
42 std::ostream& operator<<(std::ostream& os, uint8_t value)
43 {
44     return os << static_cast<int>(value);
45 }
46 
47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode)
48 {
49     if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS)
50     {
51         std::cerr << "PECI command failed."
52                   << " Driver Status = " << libStatus << ","
53                   << " Completion Code = " << completionCode << '\n';
54         return false;
55     }
56     return true;
57 }
58 
59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
60 {
61     std::vector<uint32_t> bitList;
62     for (size_t i = 0; i < mask.size(); ++i)
63     {
64         if (mask.test(i))
65         {
66             bitList.push_back(i);
67         }
68     }
69     return bitList;
70 }
71 
72 static std::vector<BackendProvider>& getProviders()
73 {
74     static auto* providers = new std::vector<BackendProvider>;
75     return *providers;
76 }
77 
78 void registerBackend(BackendProvider providerFn)
79 {
80     getProviders().push_back(providerFn);
81 }
82 
83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model)
84 {
85     DEBUG_PRINT << "Searching for provider for " << address << ", model "
86                 << std::hex << model << std::dec << '\n';
87     for (const auto& provider : getProviders())
88     {
89         try
90         {
91             auto interface = provider(address, model);
92             DEBUG_PRINT << "returned " << interface << '\n';
93             if (interface)
94             {
95                 return interface;
96             }
97         }
98         catch (...)
99         {}
100     }
101     DEBUG_PRINT << "No supported backends found\n";
102     return nullptr;
103 }
104 
105 using BaseCurrentOperatingConfig =
106     sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
107                                     control::processor::CurrentOperatingConfig>;
108 
109 using BaseOperatingConfig =
110     sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
111                                     inventory::item::cpu::OperatingConfig>;
112 
113 class OperatingConfig : public BaseOperatingConfig
114 {
115   public:
116     std::string path;
117     unsigned int level;
118 
119   public:
120     using BaseOperatingConfig::BaseOperatingConfig;
121     OperatingConfig(sdbusplus::bus_t& bus, unsigned int level_,
122                     std::string path_) :
123         BaseOperatingConfig(bus, path_.c_str(), action::defer_emit),
124         path(std::move(path_)), level(level_)
125     {}
126 };
127 
128 class CPUConfig : public BaseCurrentOperatingConfig
129 {
130   private:
131     /** Objects describing all available SST configs - not modifiable. */
132     std::vector<std::unique_ptr<OperatingConfig>> availConfigs;
133     sdbusplus::bus_t& bus;
134     const uint8_t peciAddress;
135     const std::string path; ///< D-Bus path of CPU object
136     const CPUModel cpuModel;
137 
138     // Keep mutable copies of the properties so we can cache values that we
139     // retrieve in the getters. We don't want to throw an error on a D-Bus
140     // get-property call (extra error handling in clients), so by caching we can
141     // hide any temporary hiccup in PECI communication.
142     // These values can be changed by in-band software so we have to do a full
143     // PECI read on every get-property, and can't assume that values will change
144     // only when set-property is done.
145     mutable unsigned int currentLevel;
146     mutable bool bfEnabled;
147 
148     /**
149      * Enforce common pre-conditions for D-Bus set property handlers.
150      */
151     void setPropertyCheckOrThrow(SSTInterface& sst)
152     {
153         if (!sst.supportsControl())
154         {
155             throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
156         }
157         if (hostState != HostState::postComplete || !sst.ready())
158         {
159             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
160         }
161     }
162 
163   public:
164     CPUConfig(sdbusplus::bus_t& bus_, uint8_t index, CPUModel model) :
165         BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(),
166                                    action::defer_emit),
167         bus(bus_), peciAddress(index + MIN_CLIENT_ADDR),
168         path(generatePath(index)), cpuModel(model), currentLevel(0),
169         bfEnabled(false)
170     {}
171 
172     //
173     // D-Bus Property Overrides
174     //
175 
176     sdbusplus::message::object_path appliedConfig() const override
177     {
178         DEBUG_PRINT << "Reading AppliedConfig\n";
179         if (hostState != HostState::off)
180         {
181             // Otherwise, try to read current state
182             auto sst = getInstance(peciAddress, cpuModel);
183             if (!sst || !sst->ready())
184             {
185                 std::cerr << __func__
186                           << ": Failed to get SST provider instance\n";
187             }
188             else
189             {
190                 try
191                 {
192                     currentLevel = sst->currentLevel();
193                 }
194                 catch (const PECIError& error)
195                 {
196                     std::cerr << "Failed to get SST-PP level: " << error.what()
197                               << "\n";
198                 }
199             }
200         }
201         return generateConfigPath(currentLevel);
202     }
203 
204     bool baseSpeedPriorityEnabled() const override
205     {
206         DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n";
207         if (hostState != HostState::off)
208         {
209             auto sst = getInstance(peciAddress, cpuModel);
210             if (!sst || !sst->ready())
211             {
212                 std::cerr << __func__
213                           << ": Failed to get SST provider instance\n";
214             }
215             else
216             {
217                 try
218                 {
219                     bfEnabled = sst->bfEnabled(currentLevel);
220                 }
221                 catch (const PECIError& error)
222                 {
223                     std::cerr << "Failed to get SST-BF status: " << error.what()
224                               << "\n";
225                 }
226             }
227         }
228         return bfEnabled;
229     }
230 
231     sdbusplus::message::object_path
232         appliedConfig(sdbusplus::message::object_path value) override
233     {
234         DEBUG_PRINT << "Writing AppliedConfig\n";
235         const OperatingConfig* newConfig = nullptr;
236         for (const auto& config : availConfigs)
237         {
238             if (config->path == value.str)
239             {
240                 newConfig = config.get();
241             }
242         }
243 
244         if (newConfig == nullptr)
245         {
246             throw sdbusplus::xyz::openbmc_project::Common::Error::
247                 InvalidArgument();
248         }
249 
250         auto sst = getInstance(peciAddress, cpuModel);
251         if (!sst)
252         {
253             std::cerr << __func__ << ": Failed to get SST provider instance\n";
254             return sdbusplus::message::object_path();
255         }
256         setPropertyCheckOrThrow(*sst);
257         try
258         {
259             sst->setCurrentLevel(newConfig->level);
260             currentLevel = newConfig->level;
261         }
262         catch (const PECIError& error)
263         {
264             std::cerr << "Failed to set new SST-PP level: " << error.what()
265                       << "\n";
266             throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
267                 WriteFailure();
268         }
269 
270         // return value not used
271         return sdbusplus::message::object_path();
272     }
273 
274     bool baseSpeedPriorityEnabled(bool /* value */) override
275     {
276         DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n";
277         throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
278         // return value not used
279         return false;
280     }
281 
282     //
283     // Additions
284     //
285 
286     OperatingConfig& newConfig(unsigned int level)
287     {
288         availConfigs.emplace_back(std::make_unique<OperatingConfig>(
289             bus, level, generateConfigPath(level)));
290         return *availConfigs.back();
291     }
292 
293     std::string generateConfigPath(unsigned int level) const
294     {
295         return path + "/config" + std::to_string(level);
296     }
297 
298     /**
299      * Emit the interface added signals which were deferred. This is required
300      * for ObjectMapper to pick up the objects, if we initially defered the
301      * signal emitting.
302      */
303     void finalize()
304     {
305         emit_added();
306         for (auto& config : availConfigs)
307         {
308             config->emit_added();
309         }
310     }
311 
312     static std::string generatePath(int index)
313     {
314         return cpuPath + std::to_string(index);
315     }
316 };
317 
318 /**
319  * Retrieve the SST parameters for a single config and fill the values into the
320  * properties on the D-Bus interface.
321  *
322  * @param[in,out]   sst         Interface to SST backend.
323  * @param[in]       level       Config TDP level to retrieve.
324  * @param[out]      config      D-Bus interface to update.
325  */
326 static void getSingleConfig(SSTInterface& sst, unsigned int level,
327                             OperatingConfig& config)
328 {
329     config.powerLimit(sst.tdp(level));
330     DEBUG_PRINT << " TDP = " << config.powerLimit() << '\n';
331 
332     config.availableCoreCount(sst.coreCount(level));
333     DEBUG_PRINT << " coreCount = " << config.availableCoreCount() << '\n';
334 
335     config.baseSpeed(sst.p1Freq(level));
336     DEBUG_PRINT << " baseSpeed = " << config.baseSpeed() << '\n';
337 
338     config.maxSpeed(sst.p0Freq(level));
339     DEBUG_PRINT << " maxSpeed = " << config.maxSpeed() << '\n';
340 
341     config.maxJunctionTemperature(sst.prochotTemp(level));
342     DEBUG_PRINT << " procHot = " << config.maxJunctionTemperature() << '\n';
343 
344     // Construct BaseSpeedPrioritySettings
345     std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds;
346     if (sst.bfSupported(level))
347     {
348         std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList;
349         totalCoreList = sst.enabledCoreList(level);
350         hiFreqCoreList = sst.bfHighPriorityCoreList(level);
351         std::set_difference(
352             totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(),
353             hiFreqCoreList.end(),
354             std::inserter(loFreqCoreList, loFreqCoreList.begin()));
355 
356         baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList},
357                       {sst.bfLowPriorityFreq(level), loFreqCoreList}};
358     }
359     config.baseSpeedPrioritySettings(baseSpeeds);
360 
361     config.turboProfile(sst.sseTurboProfile(level));
362 }
363 
364 /**
365  * Retrieve all SST configuration info for all discoverable CPUs, and publish
366  * the info on new D-Bus objects on the given bus connection.
367  *
368  * @param[in,out]   ioc     ASIO context.
369  * @param[in,out]   conn    D-Bus ASIO connection.
370  *
371  * @return  Whether discovery was successfully finished.
372  *
373  * @throw PECIError     A PECI command failed on a CPU which had previously
374  *                      responded to a command.
375  */
376 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
377                                    sdbusplus::asio::connection& conn)
378 {
379     // Persistent list - only populated after complete/successful discovery
380     static std::vector<std::unique_ptr<CPUConfig>> cpus;
381     cpus.clear();
382 
383     // Temporary staging list. In case there is any failure, these temporary
384     // objects will get dropped to avoid presenting incomplete info until the
385     // next discovery attempt.
386     std::vector<std::unique_ptr<CPUConfig>> cpuList;
387 
388     for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
389     {
390         // Let the event handler run any waiting tasks. If there is a lot of
391         // PECI contention, SST discovery could take a long time. This lets us
392         // get updates to hostState and handle any D-Bus requests.
393         ioc.poll();
394 
395         if (hostState == HostState::off)
396         {
397             return false;
398         }
399 
400         unsigned int cpuIndex = i - MIN_CLIENT_ADDR;
401         DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n';
402 
403         // We could possibly check D-Bus for CPU presence and model, but PECI is
404         // 10x faster and so much simpler.
405         uint8_t cc, stepping;
406         CPUModel cpuModel;
407         EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc);
408         if (status == PECI_CC_TIMEOUT)
409         {
410             // Timing out indicates the CPU is present but PCS services not
411             // working yet. Try again later.
412             throw PECIError("Get CPUID timed out");
413         }
414         if (status == PECI_CC_CPU_NOT_PRESENT)
415         {
416             continue;
417         }
418         if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS)
419         {
420             std::cerr << "GetCPUID returned status " << status
421                       << ", cc = " << cc << '\n';
422             continue;
423         }
424 
425         std::unique_ptr<SSTInterface> sst = getInstance(i, cpuModel);
426 
427         if (!sst)
428         {
429             // No supported backend for this CPU.
430             continue;
431         }
432 
433         if (!sst->ready())
434         {
435             // Supported CPU but it can't be queried yet. Try again later.
436             std::cerr << "sst not ready yet\n";
437             return false;
438         }
439 
440         if (!sst->ppEnabled())
441         {
442             // Supported CPU but the specific SKU doesn't support SST-PP.
443             std::cerr << "CPU doesn't support SST-PP\n";
444             continue;
445         }
446 
447         // Create the per-CPU configuration object
448         cpuList.emplace_back(
449             std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel));
450         CPUConfig& cpu = *cpuList.back();
451 
452         bool foundCurrentLevel = false;
453 
454         for (unsigned int level = 0; level <= sst->maxLevel(); ++level)
455         {
456             DEBUG_PRINT << "checking level " << level << ": ";
457             // levels 1 and 2 were legacy/deprecated, originally used for AVX
458             // license pre-granting. They may be reused for more levels in
459             // future generations. So we need to check for discontinuities.
460             if (!sst->levelSupported(level))
461             {
462                 DEBUG_PRINT << "not supported\n";
463                 continue;
464             }
465 
466             DEBUG_PRINT << "supported\n";
467 
468             getSingleConfig(*sst, level, cpu.newConfig(level));
469 
470             if (level == sst->currentLevel())
471             {
472                 foundCurrentLevel = true;
473             }
474         }
475 
476         DEBUG_PRINT << "current level is " << sst->currentLevel() << '\n';
477 
478         if (!foundCurrentLevel)
479         {
480             // In case we didn't encounter a PECI error, but also didn't find
481             // the config which is supposedly applied, we won't be able to
482             // populate the CurrentOperatingConfig so we have to remove this CPU
483             // from consideration.
484             std::cerr << "CPU " << cpuIndex
485                       << " claimed SST support but invalid configs\n";
486             cpuList.pop_back();
487             continue;
488         }
489     }
490 
491     cpuList.swap(cpus);
492     std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
493     return true;
494 }
495 
496 /**
497  * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
498  */
499 static void discoverOrWait()
500 {
501     static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
502     static int peciErrorCount = 0;
503     bool finished = false;
504 
505     // This function may be called from hostStateHandler or by retrying itself.
506     // In case those overlap, cancel any outstanding retry timer.
507     peciRetryTimer.cancel();
508 
509     try
510     {
511         DEBUG_PRINT << "Starting discovery\n";
512         finished = discoverCPUsAndConfigs(dbus::getIOContext(),
513                                           *dbus::getConnection());
514     }
515     catch (const PECIError& err)
516     {
517         std::cerr << "PECI Error: " << err.what() << '\n';
518 
519         // In case of repeated failure to finish discovery, turn off this
520         // feature altogether. Possible cause is that the CPU model does not
521         // actually support the necessary commands.
522         if (++peciErrorCount >= 50)
523         {
524             std::cerr << "Aborting SST discovery\n";
525             return;
526         }
527 
528         std::cerr << "Retrying SST discovery later\n";
529     }
530 
531     DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n';
532 
533     // Retry later if no CPUs were available, or there was a PECI error.
534     if (!finished)
535     {
536         peciRetryTimer.expires_after(std::chrono::seconds(10));
537         peciRetryTimer.async_wait([](boost::system::error_code ec) {
538             if (ec)
539             {
540                 if (ec != boost::asio::error::operation_aborted)
541                 {
542                     std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
543                 }
544                 return;
545             }
546             discoverOrWait();
547         });
548     }
549 }
550 
551 static void hostStateHandler(HostState prevState, HostState)
552 {
553     if (prevState == HostState::off)
554     {
555         // Start or re-start discovery any time the host moves out of the
556         // powered off state.
557         discoverOrWait();
558     }
559 }
560 
561 void init()
562 {
563     addHostStateCallback(hostStateHandler);
564 }
565 
566 } // namespace sst
567 } // namespace cpu_info
568