xref: /openbmc/smbios-mdr/src/speed_select.cpp (revision 1d73dccc)
1 // Copyright (c) 2020 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "speed_select.hpp"
16 
17 #include "cpuinfo.hpp"
18 #include "cpuinfo_utils.hpp"
19 
20 #include <peci.h>
21 
22 #include <boost/asio/error.hpp>
23 #include <boost/asio/steady_timer.hpp>
24 #include <xyz/openbmc_project/Common/Device/error.hpp>
25 #include <xyz/openbmc_project/Common/error.hpp>
26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp>
27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp>
28 
29 #include <algorithm>
30 #include <iostream>
31 #include <memory>
32 #include <stdexcept>
33 #include <string>
34 
35 namespace cpu_info
36 {
37 namespace sst
38 {
39 
40 // Specialize char to print the integer value instead of ascii. We basically
41 // never want to print a single ascii char.
operator <<(std::ostream & os,uint8_t value)42 std::ostream& operator<<(std::ostream& os, uint8_t value)
43 {
44     return os << static_cast<int>(value);
45 }
46 
checkPECIStatus(EPECIStatus libStatus,uint8_t completionCode)47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode)
48 {
49     if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS)
50     {
51         std::cerr << "PECI command failed."
52                   << " Driver Status = " << libStatus << ","
53                   << " Completion Code = " << completionCode << '\n';
54         return false;
55     }
56     return true;
57 }
58 
convertMaskToList(std::bitset<64> mask)59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
60 {
61     std::vector<uint32_t> bitList;
62     for (size_t i = 0; i < mask.size(); ++i)
63     {
64         if (mask.test(i))
65         {
66             bitList.push_back(i);
67         }
68     }
69     return bitList;
70 }
71 
getProviders()72 static std::vector<BackendProvider>& getProviders()
73 {
74     static auto* providers = new std::vector<BackendProvider>;
75     return *providers;
76 }
77 
registerBackend(BackendProvider providerFn)78 void registerBackend(BackendProvider providerFn)
79 {
80     getProviders().push_back(providerFn);
81 }
82 
getInstance(uint8_t address,CPUModel model,WakePolicy wakePolicy)83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model,
84                                           WakePolicy wakePolicy)
85 {
86     DEBUG_PRINT << "Searching for provider for " << address << ", model "
87                 << std::hex << model << std::dec << '\n';
88     for (const auto& provider : getProviders())
89     {
90         try
91         {
92             auto interface = provider(address, model, wakePolicy);
93             DEBUG_PRINT << "returned " << interface << '\n';
94             if (interface)
95             {
96                 return interface;
97             }
98         }
99         catch (...)
100         {}
101     }
102     DEBUG_PRINT << "No supported backends found\n";
103     return nullptr;
104 }
105 
106 using BaseCurrentOperatingConfig =
107     sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
108                                     control::processor::CurrentOperatingConfig>;
109 
110 using BaseOperatingConfig =
111     sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
112                                     inventory::item::cpu::OperatingConfig>;
113 
114 class OperatingConfig : public BaseOperatingConfig
115 {
116   public:
117     std::string path;
118     unsigned int level;
119 
120   public:
121     using BaseOperatingConfig::BaseOperatingConfig;
OperatingConfig(sdbusplus::bus_t & bus,unsigned int level_,std::string path_)122     OperatingConfig(sdbusplus::bus_t& bus, unsigned int level_,
123                     std::string path_) :
124         BaseOperatingConfig(bus, path_.c_str(), action::defer_emit),
125         path(std::move(path_)), level(level_)
126     {}
127 };
128 
129 class CPUConfig : public BaseCurrentOperatingConfig
130 {
131   private:
132     /** Objects describing all available SST configs - not modifiable. */
133     std::vector<std::unique_ptr<OperatingConfig>> availConfigs;
134     sdbusplus::bus_t& bus;
135     const uint8_t peciAddress;
136     const std::string path; ///< D-Bus path of CPU object
137     const CPUModel cpuModel;
138 
139     // Keep mutable copies of the properties so we can cache values that we
140     // retrieve in the getters. We don't want to throw an error on a D-Bus
141     // get-property call (extra error handling in clients), so by caching we can
142     // hide any temporary hiccup in PECI communication.
143     // These values can be changed by in-band software so we have to do a full
144     // PECI read on every get-property, and can't assume that values will change
145     // only when set-property is done.
146     mutable unsigned int currentLevel;
147     mutable bool bfEnabled;
148 
149     /**
150      * Enforce common pre-conditions for D-Bus set property handlers.
151      */
setPropertyCheckOrThrow(SSTInterface & sst)152     void setPropertyCheckOrThrow(SSTInterface& sst)
153     {
154         if (!sst.supportsControl())
155         {
156             throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
157         }
158         if (hostState != HostState::postComplete || !sst.ready())
159         {
160             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
161         }
162     }
163 
164   public:
CPUConfig(sdbusplus::bus_t & bus_,uint8_t index,CPUModel model,unsigned int currentLevel_,bool bfEnabled_)165     CPUConfig(sdbusplus::bus_t& bus_, uint8_t index, CPUModel model,
166               unsigned int currentLevel_, bool bfEnabled_) :
167         BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(),
168                                    action::defer_emit),
169         bus(bus_), peciAddress(index + MIN_CLIENT_ADDR),
170         path(generatePath(index)), cpuModel(model), currentLevel(currentLevel_),
171         bfEnabled(bfEnabled_)
172     {}
173 
174     //
175     // D-Bus Property Overrides
176     //
177 
appliedConfig() const178     sdbusplus::message::object_path appliedConfig() const override
179     {
180         DEBUG_PRINT << "Reading AppliedConfig\n";
181         if (hostState != HostState::off)
182         {
183             // Otherwise, try to read current state
184             auto sst = getInstance(peciAddress, cpuModel, dontWake);
185             if (!sst || !sst->ready())
186             {
187                 std::cerr << __func__
188                           << ": Failed to get SST provider instance\n";
189             }
190             else
191             {
192                 try
193                 {
194                     currentLevel = sst->currentLevel();
195                 }
196                 catch (const PECIError& error)
197                 {
198                     std::cerr << "Failed to get SST-PP level: " << error.what()
199                               << "\n";
200                 }
201             }
202         }
203         return generateConfigPath(currentLevel);
204     }
205 
baseSpeedPriorityEnabled() const206     bool baseSpeedPriorityEnabled() const override
207     {
208         DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n";
209         if (hostState != HostState::off)
210         {
211             auto sst = getInstance(peciAddress, cpuModel, dontWake);
212             if (!sst || !sst->ready())
213             {
214                 std::cerr << __func__
215                           << ": Failed to get SST provider instance\n";
216             }
217             else
218             {
219                 try
220                 {
221                     bfEnabled = sst->bfEnabled(currentLevel);
222                 }
223                 catch (const PECIError& error)
224                 {
225                     std::cerr << "Failed to get SST-BF status: " << error.what()
226                               << "\n";
227                 }
228             }
229         }
230         return bfEnabled;
231     }
232 
233     sdbusplus::message::object_path
appliedConfig(sdbusplus::message::object_path value)234         appliedConfig(sdbusplus::message::object_path value) override
235     {
236         DEBUG_PRINT << "Writing AppliedConfig\n";
237         const OperatingConfig* newConfig = nullptr;
238         for (const auto& config : availConfigs)
239         {
240             if (config->path == value.str)
241             {
242                 newConfig = config.get();
243             }
244         }
245 
246         if (newConfig == nullptr)
247         {
248             throw sdbusplus::xyz::openbmc_project::Common::Error::
249                 InvalidArgument();
250         }
251 
252         auto sst = getInstance(peciAddress, cpuModel, wakeAllowed);
253         if (!sst)
254         {
255             std::cerr << __func__ << ": Failed to get SST provider instance\n";
256             return sdbusplus::message::object_path();
257         }
258         try
259         {
260             setPropertyCheckOrThrow(*sst);
261             sst->setCurrentLevel(newConfig->level);
262             currentLevel = newConfig->level;
263         }
264         catch (const PECIError& error)
265         {
266             std::cerr << "Failed to set new SST-PP level: " << error.what()
267                       << "\n";
268             throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
269                 WriteFailure();
270         }
271 
272         // return value not used
273         return sdbusplus::message::object_path();
274     }
275 
baseSpeedPriorityEnabled(bool)276     bool baseSpeedPriorityEnabled(bool /* value */) override
277     {
278         DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n";
279         throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
280         // return value not used
281         return false;
282     }
283 
284     //
285     // Additions
286     //
287 
newConfig(unsigned int level)288     OperatingConfig& newConfig(unsigned int level)
289     {
290         availConfigs.emplace_back(std::make_unique<OperatingConfig>(
291             bus, level, generateConfigPath(level)));
292         return *availConfigs.back();
293     }
294 
generateConfigPath(unsigned int level) const295     std::string generateConfigPath(unsigned int level) const
296     {
297         return path + "/config" + std::to_string(level);
298     }
299 
300     /**
301      * Emit the interface added signals which were deferred. This is required
302      * for ObjectMapper to pick up the objects, if we initially deferred the
303      * signal emitting.
304      */
finalize()305     void finalize()
306     {
307         emit_added();
308         for (auto& config : availConfigs)
309         {
310             config->emit_added();
311         }
312     }
313 
generatePath(int index)314     static std::string generatePath(int index)
315     {
316         return cpuPath + std::to_string(index);
317     }
318 };
319 
320 /**
321  * Retrieve the SST parameters for a single config and fill the values into the
322  * properties on the D-Bus interface.
323  *
324  * @param[in,out]   sst         Interface to SST backend.
325  * @param[in]       level       Config TDP level to retrieve.
326  * @param[out]      config      D-Bus interface to update.
327  */
getSingleConfig(SSTInterface & sst,unsigned int level,OperatingConfig & config)328 static void getSingleConfig(SSTInterface& sst, unsigned int level,
329                             OperatingConfig& config)
330 {
331     config.powerLimit(sst.tdp(level));
332     DEBUG_PRINT << " TDP = " << config.powerLimit() << '\n';
333 
334     config.availableCoreCount(sst.coreCount(level));
335     DEBUG_PRINT << " coreCount = " << config.availableCoreCount() << '\n';
336 
337     config.baseSpeed(sst.p1Freq(level));
338     DEBUG_PRINT << " baseSpeed = " << config.baseSpeed() << '\n';
339 
340     config.maxSpeed(sst.p0Freq(level));
341     DEBUG_PRINT << " maxSpeed = " << config.maxSpeed() << '\n';
342 
343     config.maxJunctionTemperature(sst.prochotTemp(level));
344     DEBUG_PRINT << " procHot = " << config.maxJunctionTemperature() << '\n';
345 
346     // Construct BaseSpeedPrioritySettings
347     std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds;
348     if (sst.bfSupported(level))
349     {
350         std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList;
351         totalCoreList = sst.enabledCoreList(level);
352         hiFreqCoreList = sst.bfHighPriorityCoreList(level);
353         std::set_difference(
354             totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(),
355             hiFreqCoreList.end(),
356             std::inserter(loFreqCoreList, loFreqCoreList.begin()));
357 
358         baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList},
359                       {sst.bfLowPriorityFreq(level), loFreqCoreList}};
360     }
361     config.baseSpeedPrioritySettings(baseSpeeds);
362 
363     config.turboProfile(sst.sseTurboProfile(level));
364 }
365 
366 /**
367  * Retrieve all SST configuration info for all discoverable CPUs, and publish
368  * the info on new D-Bus objects on the given bus connection.
369  *
370  * @param[in,out]   ioc     ASIO context.
371  * @param[in,out]   conn    D-Bus ASIO connection.
372  *
373  * @return  Whether discovery was successfully finished.
374  *
375  * @throw PECIError     A PECI command failed on a CPU which had previously
376  *                      responded to a command.
377  */
discoverCPUsAndConfigs(boost::asio::io_context & ioc,sdbusplus::asio::connection & conn)378 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
379                                    sdbusplus::asio::connection& conn)
380 {
381     // Persistent list - only populated after complete/successful discovery
382     static std::vector<std::unique_ptr<CPUConfig>> cpus;
383     cpus.clear();
384 
385     // Temporary staging list. In case there is any failure, these temporary
386     // objects will get dropped to avoid presenting incomplete info until the
387     // next discovery attempt.
388     std::vector<std::unique_ptr<CPUConfig>> cpuList;
389 
390     for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
391     {
392         // Let the event handler run any waiting tasks. If there is a lot of
393         // PECI contention, SST discovery could take a long time. This lets us
394         // get updates to hostState and handle any D-Bus requests.
395         ioc.poll();
396 
397         if (hostState == HostState::off)
398         {
399             return false;
400         }
401 
402         unsigned int cpuIndex = i - MIN_CLIENT_ADDR;
403         DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n';
404 
405         // We could possibly check D-Bus for CPU presence and model, but PECI is
406         // 10x faster and so much simpler.
407         uint8_t cc, stepping;
408         CPUModel cpuModel;
409         EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc);
410         if (status == PECI_CC_TIMEOUT)
411         {
412             // Timing out indicates the CPU is present but PCS services not
413             // working yet. Try again later.
414             throw PECIError("Get CPUID timed out");
415         }
416         if (status == PECI_CC_CPU_NOT_PRESENT)
417         {
418             continue;
419         }
420         if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS)
421         {
422             std::cerr << "GetCPUID returned status " << status
423                       << ", cc = " << cc << '\n';
424             continue;
425         }
426 
427         std::unique_ptr<SSTInterface> sst =
428             getInstance(i, cpuModel, wakeAllowed);
429 
430         if (!sst)
431         {
432             // No supported backend for this CPU.
433             continue;
434         }
435 
436         if (!sst->ready())
437         {
438             // Supported CPU but it can't be queried yet. Try again later.
439             std::cerr << "sst not ready yet\n";
440             return false;
441         }
442 
443         if (!sst->ppEnabled())
444         {
445             // Supported CPU but the specific SKU doesn't support SST-PP.
446             std::cerr << "CPU doesn't support SST-PP\n";
447             continue;
448         }
449 
450         // Create the per-CPU configuration object
451         unsigned int currentLevel = sst->currentLevel();
452         cpuList.emplace_back(
453             std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel, currentLevel,
454                                         sst->bfEnabled(currentLevel)));
455         CPUConfig& cpu = *cpuList.back();
456 
457         bool foundCurrentLevel = false;
458 
459         for (unsigned int level = 0; level <= sst->maxLevel(); ++level)
460         {
461             DEBUG_PRINT << "checking level " << level << ": ";
462             // levels 1 and 2 were legacy/deprecated, originally used for AVX
463             // license pre-granting. They may be reused for more levels in
464             // future generations. So we need to check for discontinuities.
465             if (!sst->levelSupported(level))
466             {
467                 DEBUG_PRINT << "not supported\n";
468                 continue;
469             }
470 
471             DEBUG_PRINT << "supported\n";
472 
473             getSingleConfig(*sst, level, cpu.newConfig(level));
474 
475             if (level == currentLevel)
476             {
477                 foundCurrentLevel = true;
478             }
479         }
480 
481         DEBUG_PRINT << "current level is " << currentLevel << '\n';
482 
483         if (!foundCurrentLevel)
484         {
485             // In case we didn't encounter a PECI error, but also didn't find
486             // the config which is supposedly applied, we won't be able to
487             // populate the CurrentOperatingConfig so we have to remove this CPU
488             // from consideration.
489             std::cerr << "CPU " << cpuIndex
490                       << " claimed SST support but invalid configs\n";
491             cpuList.pop_back();
492             continue;
493         }
494     }
495 
496     cpuList.swap(cpus);
497     std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
498     return true;
499 }
500 
501 /**
502  * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
503  */
discoverOrWait()504 static void discoverOrWait()
505 {
506     static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
507     static int peciErrorCount = 0;
508     bool finished = false;
509 
510     // This function may be called from hostStateHandler or by retrying itself.
511     // In case those overlap, cancel any outstanding retry timer.
512     peciRetryTimer.cancel();
513 
514     try
515     {
516         DEBUG_PRINT << "Starting discovery\n";
517         finished = discoverCPUsAndConfigs(dbus::getIOContext(),
518                                           *dbus::getConnection());
519     }
520     catch (const PECIError& err)
521     {
522         std::cerr << "PECI Error: " << err.what() << '\n';
523 
524         // In case of repeated failure to finish discovery, turn off this
525         // feature altogether. Possible cause is that the CPU model does not
526         // actually support the necessary commands.
527         if (++peciErrorCount >= 50)
528         {
529             std::cerr << "Aborting SST discovery\n";
530             return;
531         }
532 
533         std::cerr << "Retrying SST discovery later\n";
534     }
535 
536     DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n';
537 
538     // Retry later if no CPUs were available, or there was a PECI error.
539     if (!finished)
540     {
541         peciRetryTimer.expires_after(std::chrono::seconds(10));
542         peciRetryTimer.async_wait([](boost::system::error_code ec) {
543             if (ec)
544             {
545                 if (ec != boost::asio::error::operation_aborted)
546                 {
547                     std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
548                 }
549                 return;
550             }
551             discoverOrWait();
552         });
553     }
554 }
555 
hostStateHandler(HostState prevState,HostState)556 static void hostStateHandler(HostState prevState, HostState)
557 {
558     if (prevState == HostState::off)
559     {
560         // Start or re-start discovery any time the host moves out of the
561         // powered off state.
562         discoverOrWait();
563     }
564 }
565 
init()566 void init()
567 {
568     addHostStateCallback(hostStateHandler);
569 }
570 
571 } // namespace sst
572 } // namespace cpu_info
573