xref: /openbmc/smbios-mdr/src/speed_select.cpp (revision 1d73dccc89f0bb9d1dce3543e5af6b3e3087d5f4)
1  // Copyright (c) 2020 Intel Corporation
2  //
3  // Licensed under the Apache License, Version 2.0 (the "License");
4  // you may not use this file except in compliance with the License.
5  // You may obtain a copy of the License at
6  //
7  //      http://www.apache.org/licenses/LICENSE-2.0
8  //
9  // Unless required by applicable law or agreed to in writing, software
10  // distributed under the License is distributed on an "AS IS" BASIS,
11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  // See the License for the specific language governing permissions and
13  // limitations under the License.
14  
15  #include "speed_select.hpp"
16  
17  #include "cpuinfo.hpp"
18  #include "cpuinfo_utils.hpp"
19  
20  #include <peci.h>
21  
22  #include <boost/asio/error.hpp>
23  #include <boost/asio/steady_timer.hpp>
24  #include <xyz/openbmc_project/Common/Device/error.hpp>
25  #include <xyz/openbmc_project/Common/error.hpp>
26  #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp>
27  #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp>
28  
29  #include <algorithm>
30  #include <iostream>
31  #include <memory>
32  #include <stdexcept>
33  #include <string>
34  
35  namespace cpu_info
36  {
37  namespace sst
38  {
39  
40  // Specialize char to print the integer value instead of ascii. We basically
41  // never want to print a single ascii char.
operator <<(std::ostream & os,uint8_t value)42  std::ostream& operator<<(std::ostream& os, uint8_t value)
43  {
44      return os << static_cast<int>(value);
45  }
46  
checkPECIStatus(EPECIStatus libStatus,uint8_t completionCode)47  bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode)
48  {
49      if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS)
50      {
51          std::cerr << "PECI command failed."
52                    << " Driver Status = " << libStatus << ","
53                    << " Completion Code = " << completionCode << '\n';
54          return false;
55      }
56      return true;
57  }
58  
convertMaskToList(std::bitset<64> mask)59  std::vector<uint32_t> convertMaskToList(std::bitset<64> mask)
60  {
61      std::vector<uint32_t> bitList;
62      for (size_t i = 0; i < mask.size(); ++i)
63      {
64          if (mask.test(i))
65          {
66              bitList.push_back(i);
67          }
68      }
69      return bitList;
70  }
71  
getProviders()72  static std::vector<BackendProvider>& getProviders()
73  {
74      static auto* providers = new std::vector<BackendProvider>;
75      return *providers;
76  }
77  
registerBackend(BackendProvider providerFn)78  void registerBackend(BackendProvider providerFn)
79  {
80      getProviders().push_back(providerFn);
81  }
82  
getInstance(uint8_t address,CPUModel model,WakePolicy wakePolicy)83  std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model,
84                                            WakePolicy wakePolicy)
85  {
86      DEBUG_PRINT << "Searching for provider for " << address << ", model "
87                  << std::hex << model << std::dec << '\n';
88      for (const auto& provider : getProviders())
89      {
90          try
91          {
92              auto interface = provider(address, model, wakePolicy);
93              DEBUG_PRINT << "returned " << interface << '\n';
94              if (interface)
95              {
96                  return interface;
97              }
98          }
99          catch (...)
100          {}
101      }
102      DEBUG_PRINT << "No supported backends found\n";
103      return nullptr;
104  }
105  
106  using BaseCurrentOperatingConfig =
107      sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
108                                      control::processor::CurrentOperatingConfig>;
109  
110  using BaseOperatingConfig =
111      sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project::
112                                      inventory::item::cpu::OperatingConfig>;
113  
114  class OperatingConfig : public BaseOperatingConfig
115  {
116    public:
117      std::string path;
118      unsigned int level;
119  
120    public:
121      using BaseOperatingConfig::BaseOperatingConfig;
OperatingConfig(sdbusplus::bus_t & bus,unsigned int level_,std::string path_)122      OperatingConfig(sdbusplus::bus_t& bus, unsigned int level_,
123                      std::string path_) :
124          BaseOperatingConfig(bus, path_.c_str(), action::defer_emit),
125          path(std::move(path_)), level(level_)
126      {}
127  };
128  
129  class CPUConfig : public BaseCurrentOperatingConfig
130  {
131    private:
132      /** Objects describing all available SST configs - not modifiable. */
133      std::vector<std::unique_ptr<OperatingConfig>> availConfigs;
134      sdbusplus::bus_t& bus;
135      const uint8_t peciAddress;
136      const std::string path; ///< D-Bus path of CPU object
137      const CPUModel cpuModel;
138  
139      // Keep mutable copies of the properties so we can cache values that we
140      // retrieve in the getters. We don't want to throw an error on a D-Bus
141      // get-property call (extra error handling in clients), so by caching we can
142      // hide any temporary hiccup in PECI communication.
143      // These values can be changed by in-band software so we have to do a full
144      // PECI read on every get-property, and can't assume that values will change
145      // only when set-property is done.
146      mutable unsigned int currentLevel;
147      mutable bool bfEnabled;
148  
149      /**
150       * Enforce common pre-conditions for D-Bus set property handlers.
151       */
setPropertyCheckOrThrow(SSTInterface & sst)152      void setPropertyCheckOrThrow(SSTInterface& sst)
153      {
154          if (!sst.supportsControl())
155          {
156              throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
157          }
158          if (hostState != HostState::postComplete || !sst.ready())
159          {
160              throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
161          }
162      }
163  
164    public:
CPUConfig(sdbusplus::bus_t & bus_,uint8_t index,CPUModel model,unsigned int currentLevel_,bool bfEnabled_)165      CPUConfig(sdbusplus::bus_t& bus_, uint8_t index, CPUModel model,
166                unsigned int currentLevel_, bool bfEnabled_) :
167          BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(),
168                                     action::defer_emit),
169          bus(bus_), peciAddress(index + MIN_CLIENT_ADDR),
170          path(generatePath(index)), cpuModel(model), currentLevel(currentLevel_),
171          bfEnabled(bfEnabled_)
172      {}
173  
174      //
175      // D-Bus Property Overrides
176      //
177  
appliedConfig() const178      sdbusplus::message::object_path appliedConfig() const override
179      {
180          DEBUG_PRINT << "Reading AppliedConfig\n";
181          if (hostState != HostState::off)
182          {
183              // Otherwise, try to read current state
184              auto sst = getInstance(peciAddress, cpuModel, dontWake);
185              if (!sst || !sst->ready())
186              {
187                  std::cerr << __func__
188                            << ": Failed to get SST provider instance\n";
189              }
190              else
191              {
192                  try
193                  {
194                      currentLevel = sst->currentLevel();
195                  }
196                  catch (const PECIError& error)
197                  {
198                      std::cerr << "Failed to get SST-PP level: " << error.what()
199                                << "\n";
200                  }
201              }
202          }
203          return generateConfigPath(currentLevel);
204      }
205  
baseSpeedPriorityEnabled() const206      bool baseSpeedPriorityEnabled() const override
207      {
208          DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n";
209          if (hostState != HostState::off)
210          {
211              auto sst = getInstance(peciAddress, cpuModel, dontWake);
212              if (!sst || !sst->ready())
213              {
214                  std::cerr << __func__
215                            << ": Failed to get SST provider instance\n";
216              }
217              else
218              {
219                  try
220                  {
221                      bfEnabled = sst->bfEnabled(currentLevel);
222                  }
223                  catch (const PECIError& error)
224                  {
225                      std::cerr << "Failed to get SST-BF status: " << error.what()
226                                << "\n";
227                  }
228              }
229          }
230          return bfEnabled;
231      }
232  
233      sdbusplus::message::object_path
appliedConfig(sdbusplus::message::object_path value)234          appliedConfig(sdbusplus::message::object_path value) override
235      {
236          DEBUG_PRINT << "Writing AppliedConfig\n";
237          const OperatingConfig* newConfig = nullptr;
238          for (const auto& config : availConfigs)
239          {
240              if (config->path == value.str)
241              {
242                  newConfig = config.get();
243              }
244          }
245  
246          if (newConfig == nullptr)
247          {
248              throw sdbusplus::xyz::openbmc_project::Common::Error::
249                  InvalidArgument();
250          }
251  
252          auto sst = getInstance(peciAddress, cpuModel, wakeAllowed);
253          if (!sst)
254          {
255              std::cerr << __func__ << ": Failed to get SST provider instance\n";
256              return sdbusplus::message::object_path();
257          }
258          try
259          {
260              setPropertyCheckOrThrow(*sst);
261              sst->setCurrentLevel(newConfig->level);
262              currentLevel = newConfig->level;
263          }
264          catch (const PECIError& error)
265          {
266              std::cerr << "Failed to set new SST-PP level: " << error.what()
267                        << "\n";
268              throw sdbusplus::xyz::openbmc_project::Common::Device::Error::
269                  WriteFailure();
270          }
271  
272          // return value not used
273          return sdbusplus::message::object_path();
274      }
275  
baseSpeedPriorityEnabled(bool)276      bool baseSpeedPriorityEnabled(bool /* value */) override
277      {
278          DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n";
279          throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed();
280          // return value not used
281          return false;
282      }
283  
284      //
285      // Additions
286      //
287  
newConfig(unsigned int level)288      OperatingConfig& newConfig(unsigned int level)
289      {
290          availConfigs.emplace_back(std::make_unique<OperatingConfig>(
291              bus, level, generateConfigPath(level)));
292          return *availConfigs.back();
293      }
294  
generateConfigPath(unsigned int level) const295      std::string generateConfigPath(unsigned int level) const
296      {
297          return path + "/config" + std::to_string(level);
298      }
299  
300      /**
301       * Emit the interface added signals which were deferred. This is required
302       * for ObjectMapper to pick up the objects, if we initially deferred the
303       * signal emitting.
304       */
finalize()305      void finalize()
306      {
307          emit_added();
308          for (auto& config : availConfigs)
309          {
310              config->emit_added();
311          }
312      }
313  
generatePath(int index)314      static std::string generatePath(int index)
315      {
316          return cpuPath + std::to_string(index);
317      }
318  };
319  
320  /**
321   * Retrieve the SST parameters for a single config and fill the values into the
322   * properties on the D-Bus interface.
323   *
324   * @param[in,out]   sst         Interface to SST backend.
325   * @param[in]       level       Config TDP level to retrieve.
326   * @param[out]      config      D-Bus interface to update.
327   */
getSingleConfig(SSTInterface & sst,unsigned int level,OperatingConfig & config)328  static void getSingleConfig(SSTInterface& sst, unsigned int level,
329                              OperatingConfig& config)
330  {
331      config.powerLimit(sst.tdp(level));
332      DEBUG_PRINT << " TDP = " << config.powerLimit() << '\n';
333  
334      config.availableCoreCount(sst.coreCount(level));
335      DEBUG_PRINT << " coreCount = " << config.availableCoreCount() << '\n';
336  
337      config.baseSpeed(sst.p1Freq(level));
338      DEBUG_PRINT << " baseSpeed = " << config.baseSpeed() << '\n';
339  
340      config.maxSpeed(sst.p0Freq(level));
341      DEBUG_PRINT << " maxSpeed = " << config.maxSpeed() << '\n';
342  
343      config.maxJunctionTemperature(sst.prochotTemp(level));
344      DEBUG_PRINT << " procHot = " << config.maxJunctionTemperature() << '\n';
345  
346      // Construct BaseSpeedPrioritySettings
347      std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds;
348      if (sst.bfSupported(level))
349      {
350          std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList;
351          totalCoreList = sst.enabledCoreList(level);
352          hiFreqCoreList = sst.bfHighPriorityCoreList(level);
353          std::set_difference(
354              totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(),
355              hiFreqCoreList.end(),
356              std::inserter(loFreqCoreList, loFreqCoreList.begin()));
357  
358          baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList},
359                        {sst.bfLowPriorityFreq(level), loFreqCoreList}};
360      }
361      config.baseSpeedPrioritySettings(baseSpeeds);
362  
363      config.turboProfile(sst.sseTurboProfile(level));
364  }
365  
366  /**
367   * Retrieve all SST configuration info for all discoverable CPUs, and publish
368   * the info on new D-Bus objects on the given bus connection.
369   *
370   * @param[in,out]   ioc     ASIO context.
371   * @param[in,out]   conn    D-Bus ASIO connection.
372   *
373   * @return  Whether discovery was successfully finished.
374   *
375   * @throw PECIError     A PECI command failed on a CPU which had previously
376   *                      responded to a command.
377   */
discoverCPUsAndConfigs(boost::asio::io_context & ioc,sdbusplus::asio::connection & conn)378  static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc,
379                                     sdbusplus::asio::connection& conn)
380  {
381      // Persistent list - only populated after complete/successful discovery
382      static std::vector<std::unique_ptr<CPUConfig>> cpus;
383      cpus.clear();
384  
385      // Temporary staging list. In case there is any failure, these temporary
386      // objects will get dropped to avoid presenting incomplete info until the
387      // next discovery attempt.
388      std::vector<std::unique_ptr<CPUConfig>> cpuList;
389  
390      for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i)
391      {
392          // Let the event handler run any waiting tasks. If there is a lot of
393          // PECI contention, SST discovery could take a long time. This lets us
394          // get updates to hostState and handle any D-Bus requests.
395          ioc.poll();
396  
397          if (hostState == HostState::off)
398          {
399              return false;
400          }
401  
402          unsigned int cpuIndex = i - MIN_CLIENT_ADDR;
403          DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n';
404  
405          // We could possibly check D-Bus for CPU presence and model, but PECI is
406          // 10x faster and so much simpler.
407          uint8_t cc, stepping;
408          CPUModel cpuModel;
409          EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc);
410          if (status == PECI_CC_TIMEOUT)
411          {
412              // Timing out indicates the CPU is present but PCS services not
413              // working yet. Try again later.
414              throw PECIError("Get CPUID timed out");
415          }
416          if (status == PECI_CC_CPU_NOT_PRESENT)
417          {
418              continue;
419          }
420          if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS)
421          {
422              std::cerr << "GetCPUID returned status " << status
423                        << ", cc = " << cc << '\n';
424              continue;
425          }
426  
427          std::unique_ptr<SSTInterface> sst =
428              getInstance(i, cpuModel, wakeAllowed);
429  
430          if (!sst)
431          {
432              // No supported backend for this CPU.
433              continue;
434          }
435  
436          if (!sst->ready())
437          {
438              // Supported CPU but it can't be queried yet. Try again later.
439              std::cerr << "sst not ready yet\n";
440              return false;
441          }
442  
443          if (!sst->ppEnabled())
444          {
445              // Supported CPU but the specific SKU doesn't support SST-PP.
446              std::cerr << "CPU doesn't support SST-PP\n";
447              continue;
448          }
449  
450          // Create the per-CPU configuration object
451          unsigned int currentLevel = sst->currentLevel();
452          cpuList.emplace_back(
453              std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel, currentLevel,
454                                          sst->bfEnabled(currentLevel)));
455          CPUConfig& cpu = *cpuList.back();
456  
457          bool foundCurrentLevel = false;
458  
459          for (unsigned int level = 0; level <= sst->maxLevel(); ++level)
460          {
461              DEBUG_PRINT << "checking level " << level << ": ";
462              // levels 1 and 2 were legacy/deprecated, originally used for AVX
463              // license pre-granting. They may be reused for more levels in
464              // future generations. So we need to check for discontinuities.
465              if (!sst->levelSupported(level))
466              {
467                  DEBUG_PRINT << "not supported\n";
468                  continue;
469              }
470  
471              DEBUG_PRINT << "supported\n";
472  
473              getSingleConfig(*sst, level, cpu.newConfig(level));
474  
475              if (level == currentLevel)
476              {
477                  foundCurrentLevel = true;
478              }
479          }
480  
481          DEBUG_PRINT << "current level is " << currentLevel << '\n';
482  
483          if (!foundCurrentLevel)
484          {
485              // In case we didn't encounter a PECI error, but also didn't find
486              // the config which is supposedly applied, we won't be able to
487              // populate the CurrentOperatingConfig so we have to remove this CPU
488              // from consideration.
489              std::cerr << "CPU " << cpuIndex
490                        << " claimed SST support but invalid configs\n";
491              cpuList.pop_back();
492              continue;
493          }
494      }
495  
496      cpuList.swap(cpus);
497      std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); });
498      return true;
499  }
500  
501  /**
502   * Attempt discovery process, and if it fails, wait for 10 seconds to try again.
503   */
discoverOrWait()504  static void discoverOrWait()
505  {
506      static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext());
507      static int peciErrorCount = 0;
508      bool finished = false;
509  
510      // This function may be called from hostStateHandler or by retrying itself.
511      // In case those overlap, cancel any outstanding retry timer.
512      peciRetryTimer.cancel();
513  
514      try
515      {
516          DEBUG_PRINT << "Starting discovery\n";
517          finished = discoverCPUsAndConfigs(dbus::getIOContext(),
518                                            *dbus::getConnection());
519      }
520      catch (const PECIError& err)
521      {
522          std::cerr << "PECI Error: " << err.what() << '\n';
523  
524          // In case of repeated failure to finish discovery, turn off this
525          // feature altogether. Possible cause is that the CPU model does not
526          // actually support the necessary commands.
527          if (++peciErrorCount >= 50)
528          {
529              std::cerr << "Aborting SST discovery\n";
530              return;
531          }
532  
533          std::cerr << "Retrying SST discovery later\n";
534      }
535  
536      DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n';
537  
538      // Retry later if no CPUs were available, or there was a PECI error.
539      if (!finished)
540      {
541          peciRetryTimer.expires_after(std::chrono::seconds(10));
542          peciRetryTimer.async_wait([](boost::system::error_code ec) {
543              if (ec)
544              {
545                  if (ec != boost::asio::error::operation_aborted)
546                  {
547                      std::cerr << "SST PECI Retry Timer failed: " << ec << '\n';
548                  }
549                  return;
550              }
551              discoverOrWait();
552          });
553      }
554  }
555  
hostStateHandler(HostState prevState,HostState)556  static void hostStateHandler(HostState prevState, HostState)
557  {
558      if (prevState == HostState::off)
559      {
560          // Start or re-start discovery any time the host moves out of the
561          // powered off state.
562          discoverOrWait();
563      }
564  }
565  
init()566  void init()
567  {
568      addHostStateCallback(hostStateHandler);
569  }
570  
571  } // namespace sst
572  } // namespace cpu_info
573