1 // Copyright (c) 2020 Intel Corporation 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "speed_select.hpp" 16 17 #include "cpuinfo.hpp" 18 #include "cpuinfo_utils.hpp" 19 20 #include <peci.h> 21 22 #include <boost/asio/error.hpp> 23 #include <boost/asio/steady_timer.hpp> 24 #include <xyz/openbmc_project/Common/Device/error.hpp> 25 #include <xyz/openbmc_project/Common/error.hpp> 26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp> 27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp> 28 29 #include <algorithm> 30 #include <iostream> 31 #include <memory> 32 #include <stdexcept> 33 #include <string> 34 35 namespace cpu_info 36 { 37 namespace sst 38 { 39 40 // Specialize char to print the integer value instead of ascii. We basically 41 // never want to print a single ascii char. 42 std::ostream& operator<<(std::ostream& os, uint8_t value) 43 { 44 return os << static_cast<int>(value); 45 } 46 47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode) 48 { 49 if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS) 50 { 51 std::cerr << "PECI command failed." 52 << " Driver Status = " << libStatus << "," 53 << " Completion Code = " << completionCode << '\n'; 54 return false; 55 } 56 return true; 57 } 58 59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask) 60 { 61 std::vector<uint32_t> bitList; 62 for (size_t i = 0; i < mask.size(); ++i) 63 { 64 if (mask.test(i)) 65 { 66 bitList.push_back(i); 67 } 68 } 69 return bitList; 70 } 71 72 static std::vector<BackendProvider>& getProviders() 73 { 74 static auto* providers = new std::vector<BackendProvider>; 75 return *providers; 76 } 77 78 void registerBackend(BackendProvider providerFn) 79 { 80 getProviders().push_back(providerFn); 81 } 82 83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model) 84 { 85 DEBUG_PRINT << "Searching for provider for " << address << ", model " 86 << std::hex << model << '\n'; 87 for (const auto& provider : getProviders()) 88 { 89 try 90 { 91 auto interface = provider(address, model); 92 DEBUG_PRINT << "returned " << interface << '\n'; 93 if (interface) 94 { 95 return interface; 96 } 97 } 98 catch (...) 99 {} 100 } 101 DEBUG_PRINT << "No supported backends found\n"; 102 return nullptr; 103 } 104 105 using BaseCurrentOperatingConfig = 106 sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Control:: 107 Processor::server::CurrentOperatingConfig>; 108 109 using BaseOperatingConfig = 110 sdbusplus::server::object_t<sdbusplus::xyz::openbmc_project::Inventory:: 111 Item::Cpu::server::OperatingConfig>; 112 113 class OperatingConfig : public BaseOperatingConfig 114 { 115 public: 116 std::string path; 117 unsigned int level; 118 119 public: 120 using BaseOperatingConfig::BaseOperatingConfig; 121 OperatingConfig(sdbusplus::bus::bus& bus, unsigned int level_, 122 std::string path_) : 123 BaseOperatingConfig(bus, path_.c_str(), action::defer_emit), 124 path(std::move(path_)), level(level_) 125 {} 126 }; 127 128 class CPUConfig : public BaseCurrentOperatingConfig 129 { 130 private: 131 /** Objects describing all available SST configs - not modifiable. */ 132 std::vector<std::unique_ptr<OperatingConfig>> availConfigs; 133 sdbusplus::bus::bus& bus; 134 const uint8_t peciAddress; 135 const std::string path; ///< D-Bus path of CPU object 136 const CPUModel cpuModel; 137 138 // Keep mutable copies of the properties so we can cache values that we 139 // retrieve in the getters. We don't want to throw an error on a D-Bus 140 // get-property call (extra error handling in clients), so by caching we can 141 // hide any temporary hiccup in PECI communication. 142 // These values can be changed by in-band software so we have to do a full 143 // PECI read on every get-property, and can't assume that values will change 144 // only when set-property is done. 145 mutable unsigned int currentLevel; 146 mutable bool bfEnabled; 147 148 /** 149 * Enforce common pre-conditions for D-Bus set property handlers. 150 */ 151 void setPropertyCheckOrThrow(SSTInterface& sst) 152 { 153 if (!sst.supportsControl()) 154 { 155 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 156 } 157 if (hostState != HostState::postComplete || !sst.ready()) 158 { 159 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable(); 160 } 161 } 162 163 public: 164 CPUConfig(sdbusplus::bus::bus& bus_, uint8_t index, CPUModel model) : 165 BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(), 166 action::defer_emit), 167 bus(bus_), peciAddress(index + MIN_CLIENT_ADDR), 168 path(generatePath(index)), cpuModel(model), currentLevel(0), 169 bfEnabled(false) 170 {} 171 172 // 173 // D-Bus Property Overrides 174 // 175 176 sdbusplus::message::object_path appliedConfig() const override 177 { 178 DEBUG_PRINT << "Reading AppliedConfig\n"; 179 // If CPU is powered off, return power-up default value of Level 0. 180 unsigned int level = 0; 181 if (hostState != HostState::off) 182 { 183 // Otherwise, try to read current state 184 auto sst = getInstance(peciAddress, cpuModel); 185 if (!sst) 186 { 187 std::cerr << __func__ 188 << ": Failed to get SST provider instance\n"; 189 } 190 else 191 { 192 try 193 { 194 currentLevel = sst->currentLevel(); 195 } 196 catch (const PECIError& error) 197 { 198 std::cerr << "Failed to get SST-PP level: " << error.what() 199 << "\n"; 200 } 201 } 202 level = currentLevel; 203 } 204 return generateConfigPath(level); 205 } 206 207 bool baseSpeedPriorityEnabled() const override 208 { 209 DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n"; 210 bool enabled = false; 211 if (hostState != HostState::off) 212 { 213 auto sst = getInstance(peciAddress, cpuModel); 214 if (!sst) 215 { 216 std::cerr << __func__ 217 << ": Failed to get SST provider instance\n"; 218 } 219 else 220 { 221 try 222 { 223 bfEnabled = sst->bfEnabled(currentLevel); 224 } 225 catch (const PECIError& error) 226 { 227 std::cerr << "Failed to get SST-BF status: " << error.what() 228 << "\n"; 229 } 230 } 231 enabled = bfEnabled; 232 } 233 return enabled; 234 } 235 236 sdbusplus::message::object_path 237 appliedConfig(sdbusplus::message::object_path value) override 238 { 239 DEBUG_PRINT << "Writing AppliedConfig\n"; 240 const OperatingConfig* newConfig = nullptr; 241 for (const auto& config : availConfigs) 242 { 243 if (config->path == value.str) 244 { 245 newConfig = config.get(); 246 } 247 } 248 249 if (newConfig == nullptr) 250 { 251 throw sdbusplus::xyz::openbmc_project::Common::Error:: 252 InvalidArgument(); 253 } 254 255 auto sst = getInstance(peciAddress, cpuModel); 256 if (!sst) 257 { 258 std::cerr << __func__ << ": Failed to get SST provider instance\n"; 259 return sdbusplus::message::object_path(); 260 } 261 setPropertyCheckOrThrow(*sst); 262 try 263 { 264 sst->setCurrentLevel(newConfig->level); 265 currentLevel = newConfig->level; 266 } 267 catch (const PECIError& error) 268 { 269 std::cerr << "Failed to set new SST-PP level: " << error.what() 270 << "\n"; 271 throw sdbusplus::xyz::openbmc_project::Common::Device::Error:: 272 WriteFailure(); 273 } 274 275 // return value not used 276 return sdbusplus::message::object_path(); 277 } 278 279 bool baseSpeedPriorityEnabled(bool /* value */) override 280 { 281 DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n"; 282 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 283 // return value not used 284 return false; 285 } 286 287 // 288 // Additions 289 // 290 291 OperatingConfig& newConfig(unsigned int level) 292 { 293 availConfigs.emplace_back(std::make_unique<OperatingConfig>( 294 bus, level, generateConfigPath(level))); 295 return *availConfigs.back(); 296 } 297 298 std::string generateConfigPath(unsigned int level) const 299 { 300 return path + "/config" + std::to_string(level); 301 } 302 303 /** 304 * Emit the interface added signals which were deferred. This is required 305 * for ObjectMapper to pick up the objects, if we initially defered the 306 * signal emitting. 307 */ 308 void finalize() 309 { 310 emit_added(); 311 for (auto& config : availConfigs) 312 { 313 config->emit_added(); 314 } 315 } 316 317 static std::string generatePath(int index) 318 { 319 return cpuPath + std::to_string(index); 320 } 321 }; 322 323 /** 324 * Retrieve the SST parameters for a single config and fill the values into the 325 * properties on the D-Bus interface. 326 * 327 * @param[in,out] sst Interface to SST backend. 328 * @param[in] level Config TDP level to retrieve. 329 * @param[out] config D-Bus interface to update. 330 */ 331 static void getSingleConfig(SSTInterface& sst, unsigned int level, 332 OperatingConfig& config) 333 { 334 config.powerLimit(sst.tdp(level)); 335 336 config.availableCoreCount(sst.coreCount(level)); 337 338 config.baseSpeed(sst.p1Freq(level)); 339 340 config.maxSpeed(sst.p0Freq(level)); 341 342 config.maxJunctionTemperature(sst.prochotTemp(level)); 343 344 // Construct BaseSpeedPrioritySettings 345 std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds; 346 if (sst.bfSupported(level)) 347 { 348 std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList; 349 totalCoreList = sst.enabledCoreList(level); 350 hiFreqCoreList = sst.bfHighPriorityCoreList(level); 351 std::set_difference( 352 totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(), 353 hiFreqCoreList.end(), 354 std::inserter(loFreqCoreList, loFreqCoreList.begin())); 355 356 baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList}, 357 {sst.bfLowPriorityFreq(level), loFreqCoreList}}; 358 } 359 config.baseSpeedPrioritySettings(baseSpeeds); 360 361 config.turboProfile(sst.sseTurboProfile(level)); 362 } 363 364 /** 365 * Retrieve all SST configuration info for all discoverable CPUs, and publish 366 * the info on new D-Bus objects on the given bus connection. 367 * 368 * @param[in,out] ioc ASIO context. 369 * @param[in,out] conn D-Bus ASIO connection. 370 * 371 * @return Whether discovery was successfully finished. 372 * 373 * @throw PECIError A PECI command failed on a CPU which had previously 374 * responded to a command. 375 */ 376 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc, 377 sdbusplus::asio::connection& conn) 378 { 379 // Persistent list - only populated after complete/successful discovery 380 static std::vector<std::unique_ptr<CPUConfig>> cpus; 381 cpus.clear(); 382 383 // Temporary staging list. In case there is any failure, these temporary 384 // objects will get dropped to avoid presenting incomplete info until the 385 // next discovery attempt. 386 std::vector<std::unique_ptr<CPUConfig>> cpuList; 387 388 for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i) 389 { 390 // Let the event handler run any waiting tasks. If there is a lot of 391 // PECI contention, SST discovery could take a long time. This lets us 392 // get updates to hostState and handle any D-Bus requests. 393 ioc.poll(); 394 395 if (hostState == HostState::off) 396 { 397 return false; 398 } 399 400 unsigned int cpuIndex = i - MIN_CLIENT_ADDR; 401 DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n'; 402 403 // We could possibly check D-Bus for CPU presence and model, but PECI is 404 // 10x faster and so much simpler. 405 uint8_t cc, stepping; 406 CPUModel cpuModel; 407 EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc); 408 if (status == PECI_CC_TIMEOUT) 409 { 410 // Timing out indicates the CPU is present but PCS services not 411 // working yet. Try again later. 412 throw PECIError("Get CPUID timed out"); 413 } 414 if (status == PECI_CC_CPU_NOT_PRESENT) 415 { 416 continue; 417 } 418 if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS) 419 { 420 std::cerr << "GetCPUID returned status " << status 421 << ", cc = " << cc << '\n'; 422 continue; 423 } 424 425 std::unique_ptr<SSTInterface> sst = getInstance(i, cpuModel); 426 427 if (!sst) 428 { 429 // No supported backend for this CPU. 430 continue; 431 } 432 433 if (!sst->ready()) 434 { 435 // Supported CPU but it can't be queried yet. Try again later. 436 std::cerr << "sst not ready yet\n"; 437 return false; 438 } 439 440 if (!sst->ppEnabled()) 441 { 442 // Supported CPU but the specific SKU doesn't support SST-PP. 443 std::cerr << "CPU doesn't support SST-PP\n"; 444 continue; 445 } 446 447 // Create the per-CPU configuration object 448 cpuList.emplace_back( 449 std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel)); 450 CPUConfig& cpu = *cpuList.back(); 451 452 bool foundCurrentLevel = false; 453 454 for (unsigned int level = 0; level <= sst->numLevels(); ++level) 455 { 456 // levels 1 and 2 were legacy/deprecated, originally used for AVX 457 // license pre-granting. They may be reused for more levels in 458 // future generations. So we need to check for discontinuities. 459 if (!sst->levelSupported(level)) 460 { 461 continue; 462 } 463 464 getSingleConfig(*sst, level, cpu.newConfig(level)); 465 466 if (level == sst->currentLevel()) 467 { 468 foundCurrentLevel = true; 469 } 470 } 471 472 if (!foundCurrentLevel) 473 { 474 // In case we didn't encounter a PECI error, but also didn't find 475 // the config which is supposedly applied, we won't be able to 476 // populate the CurrentOperatingConfig so we have to remove this CPU 477 // from consideration. 478 std::cerr << "CPU " << cpuIndex 479 << " claimed SST support but invalid configs\n"; 480 cpuList.pop_back(); 481 continue; 482 } 483 } 484 485 cpuList.swap(cpus); 486 std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); }); 487 return true; 488 } 489 490 /** 491 * Attempt discovery process, and if it fails, wait for 10 seconds to try again. 492 */ 493 static void discoverOrWait() 494 { 495 static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext()); 496 static int peciErrorCount = 0; 497 bool finished = false; 498 499 // This function may be called from hostStateHandler or by retrying itself. 500 // In case those overlap, cancel any outstanding retry timer. 501 peciRetryTimer.cancel(); 502 503 try 504 { 505 DEBUG_PRINT << "Starting discovery\n"; 506 finished = discoverCPUsAndConfigs(dbus::getIOContext(), 507 *dbus::getConnection()); 508 } 509 catch (const PECIError& err) 510 { 511 std::cerr << "PECI Error: " << err.what() << '\n'; 512 513 // In case of repeated failure to finish discovery, turn off this 514 // feature altogether. Possible cause is that the CPU model does not 515 // actually support the necessary commands. 516 if (++peciErrorCount >= 50) 517 { 518 std::cerr << "Aborting SST discovery\n"; 519 return; 520 } 521 522 std::cerr << "Retrying SST discovery later\n"; 523 } 524 525 DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n'; 526 527 // Retry later if no CPUs were available, or there was a PECI error. 528 if (!finished) 529 { 530 peciRetryTimer.expires_after(std::chrono::seconds(10)); 531 peciRetryTimer.async_wait([](boost::system::error_code ec) { 532 if (ec) 533 { 534 if (ec != boost::asio::error::operation_aborted) 535 { 536 std::cerr << "SST PECI Retry Timer failed: " << ec << '\n'; 537 } 538 return; 539 } 540 discoverOrWait(); 541 }); 542 } 543 } 544 545 static void hostStateHandler(HostState prevState, HostState) 546 { 547 if (prevState == HostState::off) 548 { 549 // Start or re-start discovery any time the host moves out of the 550 // powered off state. 551 discoverOrWait(); 552 } 553 } 554 555 void init() 556 { 557 addHostStateCallback(hostStateHandler); 558 } 559 560 } // namespace sst 561 } // namespace cpu_info 562