1 // Copyright (c) 2020 Intel Corporation 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "speed_select.hpp" 16 17 #include "cpuinfo.hpp" 18 #include "cpuinfo_utils.hpp" 19 20 #include <peci.h> 21 22 #include <boost/asio/error.hpp> 23 #include <boost/asio/steady_timer.hpp> 24 #include <xyz/openbmc_project/Common/Device/error.hpp> 25 #include <xyz/openbmc_project/Common/error.hpp> 26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp> 27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp> 28 29 #include <algorithm> 30 #include <iostream> 31 #include <memory> 32 #include <stdexcept> 33 #include <string> 34 35 namespace cpu_info 36 { 37 namespace sst 38 { 39 40 // Specialize char to print the integer value instead of ascii. We basically 41 // never want to print a single ascii char. 42 std::ostream& operator<<(std::ostream& os, uint8_t value) 43 { 44 return os << static_cast<int>(value); 45 } 46 47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode) 48 { 49 if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS) 50 { 51 std::cerr << "PECI command failed." 52 << " Driver Status = " << libStatus << "," 53 << " Completion Code = " << completionCode << '\n'; 54 return false; 55 } 56 return true; 57 } 58 59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask) 60 { 61 std::vector<uint32_t> bitList; 62 for (size_t i = 0; i < mask.size(); ++i) 63 { 64 if (mask.test(i)) 65 { 66 bitList.push_back(i); 67 } 68 } 69 return bitList; 70 } 71 72 static std::vector<BackendProvider>& getProviders() 73 { 74 static auto* providers = new std::vector<BackendProvider>; 75 return *providers; 76 } 77 78 void registerBackend(BackendProvider providerFn) 79 { 80 getProviders().push_back(providerFn); 81 } 82 83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model, 84 WakePolicy wakePolicy) 85 { 86 DEBUG_PRINT << "Searching for provider for " << address << ", model " 87 << std::hex << model << std::dec << '\n'; 88 for (const auto& provider : getProviders()) 89 { 90 try 91 { 92 auto interface = provider(address, model, wakePolicy); 93 DEBUG_PRINT << "returned " << interface << '\n'; 94 if (interface) 95 { 96 return interface; 97 } 98 } 99 catch (...) 100 {} 101 } 102 DEBUG_PRINT << "No supported backends found\n"; 103 return nullptr; 104 } 105 106 using BaseCurrentOperatingConfig = 107 sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project:: 108 control::processor::CurrentOperatingConfig>; 109 110 using BaseOperatingConfig = 111 sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project:: 112 inventory::item::cpu::OperatingConfig>; 113 114 class OperatingConfig : public BaseOperatingConfig 115 { 116 public: 117 std::string path; 118 unsigned int level; 119 120 public: 121 using BaseOperatingConfig::BaseOperatingConfig; 122 OperatingConfig(sdbusplus::bus_t& bus, unsigned int level_, 123 std::string path_) : 124 BaseOperatingConfig(bus, path_.c_str(), action::defer_emit), 125 path(std::move(path_)), level(level_) 126 {} 127 }; 128 129 class CPUConfig : public BaseCurrentOperatingConfig 130 { 131 private: 132 /** Objects describing all available SST configs - not modifiable. */ 133 std::vector<std::unique_ptr<OperatingConfig>> availConfigs; 134 sdbusplus::bus_t& bus; 135 const uint8_t peciAddress; 136 const std::string path; ///< D-Bus path of CPU object 137 const CPUModel cpuModel; 138 139 // Keep mutable copies of the properties so we can cache values that we 140 // retrieve in the getters. We don't want to throw an error on a D-Bus 141 // get-property call (extra error handling in clients), so by caching we can 142 // hide any temporary hiccup in PECI communication. 143 // These values can be changed by in-band software so we have to do a full 144 // PECI read on every get-property, and can't assume that values will change 145 // only when set-property is done. 146 mutable unsigned int currentLevel; 147 mutable bool bfEnabled; 148 149 /** 150 * Enforce common pre-conditions for D-Bus set property handlers. 151 */ 152 void setPropertyCheckOrThrow(SSTInterface& sst) 153 { 154 if (!sst.supportsControl()) 155 { 156 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 157 } 158 if (hostState != HostState::postComplete || !sst.ready()) 159 { 160 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable(); 161 } 162 } 163 164 public: 165 CPUConfig(sdbusplus::bus_t& bus_, uint8_t index, CPUModel model, 166 unsigned int currentLevel_, bool bfEnabled_) : 167 BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(), 168 action::defer_emit), 169 bus(bus_), peciAddress(index + MIN_CLIENT_ADDR), 170 path(generatePath(index)), cpuModel(model), currentLevel(currentLevel_), 171 bfEnabled(bfEnabled_) 172 {} 173 174 // 175 // D-Bus Property Overrides 176 // 177 178 sdbusplus::message::object_path appliedConfig() const override 179 { 180 DEBUG_PRINT << "Reading AppliedConfig\n"; 181 if (hostState != HostState::off) 182 { 183 // Otherwise, try to read current state 184 auto sst = getInstance(peciAddress, cpuModel, dontWake); 185 if (!sst || !sst->ready()) 186 { 187 std::cerr << __func__ 188 << ": Failed to get SST provider instance\n"; 189 } 190 else 191 { 192 try 193 { 194 currentLevel = sst->currentLevel(); 195 } 196 catch (const PECIError& error) 197 { 198 std::cerr << "Failed to get SST-PP level: " << error.what() 199 << "\n"; 200 } 201 } 202 } 203 return generateConfigPath(currentLevel); 204 } 205 206 bool baseSpeedPriorityEnabled() const override 207 { 208 DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n"; 209 if (hostState != HostState::off) 210 { 211 auto sst = getInstance(peciAddress, cpuModel, dontWake); 212 if (!sst || !sst->ready()) 213 { 214 std::cerr << __func__ 215 << ": Failed to get SST provider instance\n"; 216 } 217 else 218 { 219 try 220 { 221 bfEnabled = sst->bfEnabled(currentLevel); 222 } 223 catch (const PECIError& error) 224 { 225 std::cerr << "Failed to get SST-BF status: " << error.what() 226 << "\n"; 227 } 228 } 229 } 230 return bfEnabled; 231 } 232 233 sdbusplus::message::object_path 234 appliedConfig(sdbusplus::message::object_path value) override 235 { 236 DEBUG_PRINT << "Writing AppliedConfig\n"; 237 const OperatingConfig* newConfig = nullptr; 238 for (const auto& config : availConfigs) 239 { 240 if (config->path == value.str) 241 { 242 newConfig = config.get(); 243 } 244 } 245 246 if (newConfig == nullptr) 247 { 248 throw sdbusplus::xyz::openbmc_project::Common::Error:: 249 InvalidArgument(); 250 } 251 252 auto sst = getInstance(peciAddress, cpuModel, wakeAllowed); 253 if (!sst) 254 { 255 std::cerr << __func__ << ": Failed to get SST provider instance\n"; 256 return sdbusplus::message::object_path(); 257 } 258 try 259 { 260 setPropertyCheckOrThrow(*sst); 261 sst->setCurrentLevel(newConfig->level); 262 currentLevel = newConfig->level; 263 } 264 catch (const PECIError& error) 265 { 266 std::cerr << "Failed to set new SST-PP level: " << error.what() 267 << "\n"; 268 throw sdbusplus::xyz::openbmc_project::Common::Device::Error:: 269 WriteFailure(); 270 } 271 272 // return value not used 273 return sdbusplus::message::object_path(); 274 } 275 276 bool baseSpeedPriorityEnabled(bool /* value */) override 277 { 278 DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n"; 279 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 280 // return value not used 281 return false; 282 } 283 284 // 285 // Additions 286 // 287 288 OperatingConfig& newConfig(unsigned int level) 289 { 290 availConfigs.emplace_back(std::make_unique<OperatingConfig>( 291 bus, level, generateConfigPath(level))); 292 return *availConfigs.back(); 293 } 294 295 std::string generateConfigPath(unsigned int level) const 296 { 297 return path + "/config" + std::to_string(level); 298 } 299 300 /** 301 * Emit the interface added signals which were deferred. This is required 302 * for ObjectMapper to pick up the objects, if we initially deferred the 303 * signal emitting. 304 */ 305 void finalize() 306 { 307 emit_added(); 308 for (auto& config : availConfigs) 309 { 310 config->emit_added(); 311 } 312 } 313 314 static std::string generatePath(int index) 315 { 316 return cpuPath + std::to_string(index); 317 } 318 }; 319 320 /** 321 * Retrieve the SST parameters for a single config and fill the values into the 322 * properties on the D-Bus interface. 323 * 324 * @param[in,out] sst Interface to SST backend. 325 * @param[in] level Config TDP level to retrieve. 326 * @param[out] config D-Bus interface to update. 327 */ 328 static void getSingleConfig(SSTInterface& sst, unsigned int level, 329 OperatingConfig& config) 330 { 331 config.powerLimit(sst.tdp(level)); 332 DEBUG_PRINT << " TDP = " << config.powerLimit() << '\n'; 333 334 config.availableCoreCount(sst.coreCount(level)); 335 DEBUG_PRINT << " coreCount = " << config.availableCoreCount() << '\n'; 336 337 config.baseSpeed(sst.p1Freq(level)); 338 DEBUG_PRINT << " baseSpeed = " << config.baseSpeed() << '\n'; 339 340 config.maxSpeed(sst.p0Freq(level)); 341 DEBUG_PRINT << " maxSpeed = " << config.maxSpeed() << '\n'; 342 343 config.maxJunctionTemperature(sst.prochotTemp(level)); 344 DEBUG_PRINT << " procHot = " << config.maxJunctionTemperature() << '\n'; 345 346 // Construct BaseSpeedPrioritySettings 347 std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds; 348 if (sst.bfSupported(level)) 349 { 350 std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList; 351 totalCoreList = sst.enabledCoreList(level); 352 hiFreqCoreList = sst.bfHighPriorityCoreList(level); 353 std::set_difference( 354 totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(), 355 hiFreqCoreList.end(), 356 std::inserter(loFreqCoreList, loFreqCoreList.begin())); 357 358 baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList}, 359 {sst.bfLowPriorityFreq(level), loFreqCoreList}}; 360 } 361 config.baseSpeedPrioritySettings(baseSpeeds); 362 363 config.turboProfile(sst.sseTurboProfile(level)); 364 } 365 366 /** 367 * Retrieve all SST configuration info for all discoverable CPUs, and publish 368 * the info on new D-Bus objects on the given bus connection. 369 * 370 * @param[in,out] ioc ASIO context. 371 * @param[in,out] conn D-Bus ASIO connection. 372 * 373 * @return Whether discovery was successfully finished. 374 * 375 * @throw PECIError A PECI command failed on a CPU which had previously 376 * responded to a command. 377 */ 378 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc, 379 sdbusplus::asio::connection& conn) 380 { 381 // Persistent list - only populated after complete/successful discovery 382 static std::vector<std::unique_ptr<CPUConfig>> cpus; 383 cpus.clear(); 384 385 // Temporary staging list. In case there is any failure, these temporary 386 // objects will get dropped to avoid presenting incomplete info until the 387 // next discovery attempt. 388 std::vector<std::unique_ptr<CPUConfig>> cpuList; 389 390 for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i) 391 { 392 // Let the event handler run any waiting tasks. If there is a lot of 393 // PECI contention, SST discovery could take a long time. This lets us 394 // get updates to hostState and handle any D-Bus requests. 395 ioc.poll(); 396 397 if (hostState == HostState::off) 398 { 399 return false; 400 } 401 402 unsigned int cpuIndex = i - MIN_CLIENT_ADDR; 403 DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n'; 404 405 // We could possibly check D-Bus for CPU presence and model, but PECI is 406 // 10x faster and so much simpler. 407 uint8_t cc, stepping; 408 CPUModel cpuModel; 409 EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc); 410 if (status == PECI_CC_TIMEOUT) 411 { 412 // Timing out indicates the CPU is present but PCS services not 413 // working yet. Try again later. 414 throw PECIError("Get CPUID timed out"); 415 } 416 if (status == PECI_CC_CPU_NOT_PRESENT) 417 { 418 continue; 419 } 420 if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS) 421 { 422 std::cerr << "GetCPUID returned status " << status 423 << ", cc = " << cc << '\n'; 424 continue; 425 } 426 427 std::unique_ptr<SSTInterface> sst = 428 getInstance(i, cpuModel, wakeAllowed); 429 430 if (!sst) 431 { 432 // No supported backend for this CPU. 433 continue; 434 } 435 436 if (!sst->ready()) 437 { 438 // Supported CPU but it can't be queried yet. Try again later. 439 std::cerr << "sst not ready yet\n"; 440 return false; 441 } 442 443 if (!sst->ppEnabled()) 444 { 445 // Supported CPU but the specific SKU doesn't support SST-PP. 446 std::cerr << "CPU doesn't support SST-PP\n"; 447 continue; 448 } 449 450 // Create the per-CPU configuration object 451 unsigned int currentLevel = sst->currentLevel(); 452 cpuList.emplace_back( 453 std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel, currentLevel, 454 sst->bfEnabled(currentLevel))); 455 CPUConfig& cpu = *cpuList.back(); 456 457 bool foundCurrentLevel = false; 458 459 for (unsigned int level = 0; level <= sst->maxLevel(); ++level) 460 { 461 DEBUG_PRINT << "checking level " << level << ": "; 462 // levels 1 and 2 were legacy/deprecated, originally used for AVX 463 // license pre-granting. They may be reused for more levels in 464 // future generations. So we need to check for discontinuities. 465 if (!sst->levelSupported(level)) 466 { 467 DEBUG_PRINT << "not supported\n"; 468 continue; 469 } 470 471 DEBUG_PRINT << "supported\n"; 472 473 getSingleConfig(*sst, level, cpu.newConfig(level)); 474 475 if (level == currentLevel) 476 { 477 foundCurrentLevel = true; 478 } 479 } 480 481 DEBUG_PRINT << "current level is " << currentLevel << '\n'; 482 483 if (!foundCurrentLevel) 484 { 485 // In case we didn't encounter a PECI error, but also didn't find 486 // the config which is supposedly applied, we won't be able to 487 // populate the CurrentOperatingConfig so we have to remove this CPU 488 // from consideration. 489 std::cerr << "CPU " << cpuIndex 490 << " claimed SST support but invalid configs\n"; 491 cpuList.pop_back(); 492 continue; 493 } 494 } 495 496 cpuList.swap(cpus); 497 std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); }); 498 return true; 499 } 500 501 /** 502 * Attempt discovery process, and if it fails, wait for 10 seconds to try again. 503 */ 504 static void discoverOrWait() 505 { 506 static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext()); 507 static int peciErrorCount = 0; 508 bool finished = false; 509 510 // This function may be called from hostStateHandler or by retrying itself. 511 // In case those overlap, cancel any outstanding retry timer. 512 peciRetryTimer.cancel(); 513 514 try 515 { 516 DEBUG_PRINT << "Starting discovery\n"; 517 finished = discoverCPUsAndConfigs(dbus::getIOContext(), 518 *dbus::getConnection()); 519 } 520 catch (const PECIError& err) 521 { 522 std::cerr << "PECI Error: " << err.what() << '\n'; 523 524 // In case of repeated failure to finish discovery, turn off this 525 // feature altogether. Possible cause is that the CPU model does not 526 // actually support the necessary commands. 527 if (++peciErrorCount >= 50) 528 { 529 std::cerr << "Aborting SST discovery\n"; 530 return; 531 } 532 533 std::cerr << "Retrying SST discovery later\n"; 534 } 535 536 DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n'; 537 538 // Retry later if no CPUs were available, or there was a PECI error. 539 if (!finished) 540 { 541 peciRetryTimer.expires_after(std::chrono::seconds(10)); 542 peciRetryTimer.async_wait([](boost::system::error_code ec) { 543 if (ec) 544 { 545 if (ec != boost::asio::error::operation_aborted) 546 { 547 std::cerr << "SST PECI Retry Timer failed: " << ec << '\n'; 548 } 549 return; 550 } 551 discoverOrWait(); 552 }); 553 } 554 } 555 556 static void hostStateHandler(HostState prevState, HostState) 557 { 558 if (prevState == HostState::off) 559 { 560 // Start or re-start discovery any time the host moves out of the 561 // powered off state. 562 discoverOrWait(); 563 } 564 } 565 566 void init() 567 { 568 addHostStateCallback(hostStateHandler); 569 } 570 571 } // namespace sst 572 } // namespace cpu_info 573