xref: /openbmc/smbios-mdr/src/speed_select.cpp (revision 06639639)
1 // Copyright (c) 2020 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "speed_select.hpp"
16 
17 #include "cpuinfo.hpp"
18 #include "cpuinfo_utils.hpp"
19 
20 #include <peci.h>
21 
22 #include <boost/asio/error.hpp>
23 #include <boost/asio/steady_timer.hpp>
24 #include <xyz/openbmc_project/Common/Device/error.hpp>
25 #include <xyz/openbmc_project/Common/error.hpp>
26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp>
27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp>
28 
29 #include <algorithm>
30 #include <iostream>
31 #include <memory>
32 #include <stdexcept>
33 #include <string>
34 
35 namespace cpu_info
36 {
37 namespace sst
38 {
39 
40 // Specialize char to print the integer value instead of ascii. We basically
41 // never want to print a single ascii char.
42 std::ostream& operator<<(std::ostream& os, uint8_t value)
43 {
44     return os << static_cast<int>(value);
45 }
46 
47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode)
48 {
49     if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS)
50     {
51         std::cerr << "PECI command failed."
52                   << " Driver Status = " << libStatus << ","
53                   << " Completion Code = " << completionCode << '\n';
54         return false;
55     }
56     return true;
57 }
58 
59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
60 {
61     std::vector<uint32_t> bitList;
62     for (size_t i = 0; i < mask.size(); ++i)
63     {
64         if (mask.test(i))
65         {
66             bitList.push_back(i);
67         }
68     }
69     return bitList;
70 }
71 
72 static std::vector<BackendProvider>& getProviders()
73 {
74     static auto* providers = new std::vector<BackendProvider>;
75     return *providers;
76 }
77 
78 void registerBackend(BackendProvider providerFn)
79 {
80     getProviders().push_back(providerFn);
81 }
82 
83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model)
84 {
85     DEBUG_PRINT << "Searching for provider for " << address << ", model "
86                 << std::hex << model << '\n';
87     for (const auto& provider : getProviders())
88     {
89         try
90         {
91             auto interface = provider(address, model);
92             DEBUG_PRINT << "returned " << interface << '\n';
93             if (interface)
94             {
95                 return interface;
96             }
97         }
98         catch (...)
99         {}
100     }
101     DEBUG_PRINT << "No supported backends found\n";
102     return nullptr;
103 }
104 
105 using BaseCurrentOperatingConfig =
106     sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Control::
107                                     Processor::server::CurrentOperatingConfig>;
108 
109 using BaseOperatingConfig =
110     sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Inventory::
111                                     Item::Cpu::server::OperatingConfig>;
112 
113 class OperatingConfig : public BaseOperatingConfig
114 {
115   public:
116     std::string path;
117     unsigned int level;
118 
119   public:
120     using BaseOperatingConfig::BaseOperatingConfig;
121     OperatingConfig(sdbusplus::bus::bus& bus, unsigned int level_,
122                     std::string path_) :
123         BaseOperatingConfig(bus, path_.c_str(), action::defer_emit),
124         path(std::move(path_)), level(level_)
125     {}
126 };
127 
128 class CPUConfig : public BaseCurrentOperatingConfig
129 {
130   private:
131     /** Objects describing all available SST configs - not modifiable. */
132     std::vector<std::unique_ptr<OperatingConfig>> availConfigs;
133     sdbusplus::bus::bus& bus;
134     const uint8_t peciAddress;
135     const std::string path; ///< D-Bus path of CPU object
136     const CPUModel cpuModel;
137 
138     // Keep mutable copies of the properties so we can cache values that we
139     // retrieve in the getters. We don't want to throw an error on a D-Bus
140     // get-property call (extra error handling in clients), so by caching we can
141     // hide any temporary hiccup in PECI communication.
142     // These values can be changed by in-band software so we have to do a full
143     // PECI read on every get-property, and can't assume that values will change
144     // only when set-property is done.
145     mutable unsigned int currentLevel;
146     mutable bool bfEnabled;
147 
148     /**
149      * Enforce common pre-conditions for D-Bus set property handlers.
150      */
151     void setPropertyCheckOrThrow(SSTInterface& sst)
152     {
153         if (!sst.supportsControl())
154         {
155             throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
156         }
157         if (hostState != HostState::postComplete || !sst.ready())
158         {
159             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
160         }
161     }
162 
163   public:
164     CPUConfig(sdbusplus::bus::bus& bus_, uint8_t index, CPUModel model) :
165         BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(),
166                                    action::defer_emit),
167         bus(bus_), peciAddress(index + MIN_CLIENT_ADDR),
168         path(generatePath(index)), cpuModel(model), currentLevel(0),
169         bfEnabled(false)
170     {}
171 
172     //
173     // D-Bus Property Overrides
174     //
175 
176     sdbusplus::message::object_path appliedConfig() const override
177     {
178         DEBUG_PRINT << "Reading AppliedConfig\n";
179         // If CPU is powered off, return power-up default value of Level 0.
180         unsigned int level = 0;
181         if (hostState != HostState::off)
182         {
183             // Otherwise, try to read current state
184             auto sst = getInstance(peciAddress, cpuModel);
185             if (!sst)
186             {
187                 std::cerr << __func__
188                           << ": Failed to get SST provider instance\n";
189             }
190             else
191             {
192                 try
193                 {
194                     currentLevel = sst->currentLevel();
195                 }
196                 catch (const PECIError& error)
197                 {
198                     std::cerr << "Failed to get SST-PP level: " << error.what()
199                               << "\n";
200                 }
201             }
202             level = currentLevel;
203         }
204         return generateConfigPath(level);
205     }
206 
207     bool baseSpeedPriorityEnabled() const override
208     {
209         DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n";
210         bool enabled = false;
211         if (hostState != HostState::off)
212         {
213             auto sst = getInstance(peciAddress, cpuModel);
214             if (!sst)
215             {
216                 std::cerr << __func__
217                           << ": Failed to get SST provider instance\n";
218             }
219             else
220             {
221                 try
222                 {
223                     bfEnabled = sst->bfEnabled(currentLevel);
224                 }
225                 catch (const PECIError& error)
226                 {
227                     std::cerr << "Failed to get SST-BF status: " << error.what()
228                               << "\n";
229                 }
230             }
231             enabled = bfEnabled;
232         }
233         return enabled;
234     }
235 
236     sdbusplus::message::object_path
237         appliedConfig(sdbusplus::message::object_path value) override
238     {
239         DEBUG_PRINT << "Writing AppliedConfig\n";
240         const OperatingConfig* newConfig = nullptr;
241         for (const auto& config : availConfigs)
242         {
243             if (config->path == value.str)
244             {
245                 newConfig = config.get();
246             }
247         }
248 
249         if (newConfig == nullptr)
250         {
251             throw sdbusplus::xyz::openbmc_project::Common::Error::
252                 InvalidArgument();
253         }
254 
255         auto sst = getInstance(peciAddress, cpuModel);
256         if (!sst)
257         {
258             std::cerr << __func__ << ": Failed to get SST provider instance\n";
259             return sdbusplus::message::object_path();
260         }
261         setPropertyCheckOrThrow(*sst);
262         try
263         {
264             sst->setCurrentLevel(newConfig->level);
265             currentLevel = newConfig->level;
266         }
267         catch (const PECIError& error)
268         {
269             std::cerr << "Failed to set new SST-PP level: " << error.what()
270                       << "\n";
271             throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
272                 WriteFailure();
273         }
274 
275         // return value not used
276         return sdbusplus::message::object_path();
277     }
278 
279     bool baseSpeedPriorityEnabled(bool /* value */) override
280     {
281         DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n";
282         throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
283         // return value not used
284         return false;
285     }
286 
287     //
288     // Additions
289     //
290 
291     OperatingConfig& newConfig(unsigned int level)
292     {
293         availConfigs.emplace_back(std::make_unique<OperatingConfig>(
294             bus, level, generateConfigPath(level)));
295         return *availConfigs.back();
296     }
297 
298     std::string generateConfigPath(unsigned int level) const
299     {
300         return path + "/config" + std::to_string(level);
301     }
302 
303     /**
304      * Emit the interface added signals which were deferred. This is required
305      * for ObjectMapper to pick up the objects, if we initially defered the
306      * signal emitting.
307      */
308     void finalize()
309     {
310         emit_added();
311         for (auto& config : availConfigs)
312         {
313             config->emit_added();
314         }
315     }
316 
317     static std::string generatePath(int index)
318     {
319         return cpuPath + std::to_string(index);
320     }
321 };
322 
323 /**
324  * Retrieve the SST parameters for a single config and fill the values into the
325  * properties on the D-Bus interface.
326  *
327  * @param[in,out]   sst         Interface to SST backend.
328  * @param[in]       level       Config TDP level to retrieve.
329  * @param[out]      config      D-Bus interface to update.
330  */
331 static void getSingleConfig(SSTInterface& sst, unsigned int level,
332                             OperatingConfig& config)
333 {
334     config.powerLimit(sst.tdp(level));
335 
336     config.availableCoreCount(sst.coreCount(level));
337 
338     config.baseSpeed(sst.p1Freq(level));
339 
340     config.maxSpeed(sst.p0Freq(level));
341 
342     config.maxJunctionTemperature(sst.prochotTemp(level));
343 
344     // Construct BaseSpeedPrioritySettings
345     std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds;
346     if (sst.bfSupported(level))
347     {
348         std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList;
349         totalCoreList = sst.enabledCoreList(level);
350         hiFreqCoreList = sst.bfHighPriorityCoreList(level);
351         std::set_difference(
352             totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(),
353             hiFreqCoreList.end(),
354             std::inserter(loFreqCoreList, loFreqCoreList.begin()));
355 
356         baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList},
357                       {sst.bfLowPriorityFreq(level), loFreqCoreList}};
358     }
359     config.baseSpeedPrioritySettings(baseSpeeds);
360 
361     config.turboProfile(sst.sseTurboProfile(level));
362 }
363 
364 /**
365  * Retrieve all SST configuration info for all discoverable CPUs, and publish
366  * the info on new D-Bus objects on the given bus connection.
367  *
368  * @param[in,out]   ioc     ASIO context.
369  * @param[in,out]   conn    D-Bus ASIO connection.
370  *
371  * @return  Whether discovery was successfully finished.
372  *
373  * @throw PECIError     A PECI command failed on a CPU which had previously
374  *                      responded to a command.
375  */
376 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
377                                    sdbusplus::asio::connection& conn)
378 {
379     // Persistent list - only populated after complete/successful discovery
380     static std::vector<std::unique_ptr<CPUConfig>> cpus;
381     cpus.clear();
382 
383     // Temporary staging list. In case there is any failure, these temporary
384     // objects will get dropped to avoid presenting incomplete info until the
385     // next discovery attempt.
386     std::vector<std::unique_ptr<CPUConfig>> cpuList;
387 
388     for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
389     {
390         // Let the event handler run any waiting tasks. If there is a lot of
391         // PECI contention, SST discovery could take a long time. This lets us
392         // get updates to hostState and handle any D-Bus requests.
393         ioc.poll();
394 
395         if (hostState == HostState::off)
396         {
397             return false;
398         }
399 
400         unsigned int cpuIndex = i - MIN_CLIENT_ADDR;
401         DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n';
402 
403         // We could possibly check D-Bus for CPU presence and model, but PECI is
404         // 10x faster and so much simpler.
405         uint8_t cc, stepping;
406         CPUModel cpuModel;
407         EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc);
408         if (status == PECI_CC_TIMEOUT)
409         {
410             // Timing out indicates the CPU is present but PCS services not
411             // working yet. Try again later.
412             throw PECIError("Get CPUID timed out");
413         }
414         if (status == PECI_CC_CPU_NOT_PRESENT)
415         {
416             continue;
417         }
418         if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS)
419         {
420             std::cerr << "GetCPUID returned status " << status
421                       << ", cc = " << cc << '\n';
422             continue;
423         }
424 
425         std::unique_ptr<SSTInterface> sst = getInstance(i, cpuModel);
426 
427         if (!sst)
428         {
429             // No supported backend for this CPU.
430             continue;
431         }
432 
433         if (!sst->ready())
434         {
435             // Supported CPU but it can't be queried yet. Try again later.
436             std::cerr << "sst not ready yet\n";
437             return false;
438         }
439 
440         if (!sst->ppEnabled())
441         {
442             // Supported CPU but the specific SKU doesn't support SST-PP.
443             std::cerr << "CPU doesn't support SST-PP\n";
444             continue;
445         }
446 
447         // Create the per-CPU configuration object
448         cpuList.emplace_back(
449             std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel));
450         CPUConfig& cpu = *cpuList.back();
451 
452         bool foundCurrentLevel = false;
453 
454         for (unsigned int level = 0; level <= sst->numLevels(); ++level)
455         {
456             // levels 1 and 2 were legacy/deprecated, originally used for AVX
457             // license pre-granting. They may be reused for more levels in
458             // future generations. So we need to check for discontinuities.
459             if (!sst->levelSupported(level))
460             {
461                 continue;
462             }
463 
464             getSingleConfig(*sst, level, cpu.newConfig(level));
465 
466             if (level == sst->currentLevel())
467             {
468                 foundCurrentLevel = true;
469             }
470         }
471 
472         if (!foundCurrentLevel)
473         {
474             // In case we didn't encounter a PECI error, but also didn't find
475             // the config which is supposedly applied, we won't be able to
476             // populate the CurrentOperatingConfig so we have to remove this CPU
477             // from consideration.
478             std::cerr << "CPU " << cpuIndex
479                       << " claimed SST support but invalid configs\n";
480             cpuList.pop_back();
481             continue;
482         }
483     }
484 
485     cpuList.swap(cpus);
486     std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
487     return true;
488 }
489 
490 /**
491  * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
492  */
493 static void discoverOrWait()
494 {
495     static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
496     static int peciErrorCount = 0;
497     bool finished = false;
498 
499     // This function may be called from hostStateHandler or by retrying itself.
500     // In case those overlap, cancel any outstanding retry timer.
501     peciRetryTimer.cancel();
502 
503     try
504     {
505         DEBUG_PRINT << "Starting discovery\n";
506         finished = discoverCPUsAndConfigs(dbus::getIOContext(),
507                                           *dbus::getConnection());
508     }
509     catch (const PECIError& err)
510     {
511         std::cerr << "PECI Error: " << err.what() << '\n';
512 
513         // In case of repeated failure to finish discovery, turn off this
514         // feature altogether. Possible cause is that the CPU model does not
515         // actually support the necessary commands.
516         if (++peciErrorCount >= 50)
517         {
518             std::cerr << "Aborting SST discovery\n";
519             return;
520         }
521 
522         std::cerr << "Retrying SST discovery later\n";
523     }
524 
525     DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n';
526 
527     // Retry later if no CPUs were available, or there was a PECI error.
528     if (!finished)
529     {
530         peciRetryTimer.expires_after(std::chrono::seconds(10));
531         peciRetryTimer.async_wait([](boost::system::error_code ec) {
532             if (ec)
533             {
534                 if (ec != boost::asio::error::operation_aborted)
535                 {
536                     std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
537                 }
538                 return;
539             }
540             discoverOrWait();
541         });
542     }
543 }
544 
545 static void hostStateHandler(HostState prevState, HostState)
546 {
547     if (prevState == HostState::off)
548     {
549         // Start or re-start discovery any time the host moves out of the
550         // powered off state.
551         discoverOrWait();
552     }
553 }
554 
555 void init()
556 {
557     addHostStateCallback(hostStateHandler);
558 }
559 
560 } // namespace sst
561 } // namespace cpu_info
562