1 // Copyright (c) 2020 Intel Corporation 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "speed_select.hpp" 16 17 #include "cpuinfo.hpp" 18 #include "cpuinfo_utils.hpp" 19 20 #include <peci.h> 21 22 #include <boost/asio/error.hpp> 23 #include <boost/asio/steady_timer.hpp> 24 #include <xyz/openbmc_project/Common/Device/error.hpp> 25 #include <xyz/openbmc_project/Common/error.hpp> 26 #include <xyz/openbmc_project/Control/Processor/CurrentOperatingConfig/server.hpp> 27 #include <xyz/openbmc_project/Inventory/Item/Cpu/OperatingConfig/server.hpp> 28 29 #include <algorithm> 30 #include <iostream> 31 #include <memory> 32 #include <stdexcept> 33 #include <string> 34 35 namespace cpu_info 36 { 37 namespace sst 38 { 39 40 // Specialize char to print the integer value instead of ascii. We basically 41 // never want to print a single ascii char. 42 std::ostream& operator<<(std::ostream& os, uint8_t value) 43 { 44 return os << static_cast<int>(value); 45 } 46 47 bool checkPECIStatus(EPECIStatus libStatus, uint8_t completionCode) 48 { 49 if (libStatus != PECI_CC_SUCCESS || completionCode != PECI_DEV_CC_SUCCESS) 50 { 51 std::cerr << "PECI command failed." 52 << " Driver Status = " << libStatus << "," 53 << " Completion Code = " << completionCode << '\n'; 54 return false; 55 } 56 return true; 57 } 58 59 std::vector<uint32_t> convertMaskToList(std::bitset<64> mask) 60 { 61 std::vector<uint32_t> bitList; 62 for (size_t i = 0; i < mask.size(); ++i) 63 { 64 if (mask.test(i)) 65 { 66 bitList.push_back(i); 67 } 68 } 69 return bitList; 70 } 71 72 static std::vector<BackendProvider>& getProviders() 73 { 74 static auto* providers = new std::vector<BackendProvider>; 75 return *providers; 76 } 77 78 void registerBackend(BackendProvider providerFn) 79 { 80 getProviders().push_back(providerFn); 81 } 82 83 std::unique_ptr<SSTInterface> getInstance(uint8_t address, CPUModel model) 84 { 85 DEBUG_PRINT << "Searching for provider for " << address << ", model " 86 << std::hex << model << std::dec << '\n'; 87 for (const auto& provider : getProviders()) 88 { 89 try 90 { 91 auto interface = provider(address, model); 92 DEBUG_PRINT << "returned " << interface << '\n'; 93 if (interface) 94 { 95 return interface; 96 } 97 } 98 catch (...) 99 {} 100 } 101 DEBUG_PRINT << "No supported backends found\n"; 102 return nullptr; 103 } 104 105 using BaseCurrentOperatingConfig = 106 sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project:: 107 control::processor::CurrentOperatingConfig>; 108 109 using BaseOperatingConfig = 110 sdbusplus::server::object_t<sdbusplus::server::xyz::openbmc_project:: 111 inventory::item::cpu::OperatingConfig>; 112 113 class OperatingConfig : public BaseOperatingConfig 114 { 115 public: 116 std::string path; 117 unsigned int level; 118 119 public: 120 using BaseOperatingConfig::BaseOperatingConfig; 121 OperatingConfig(sdbusplus::bus_t& bus, unsigned int level_, 122 std::string path_) : 123 BaseOperatingConfig(bus, path_.c_str(), action::defer_emit), 124 path(std::move(path_)), level(level_) 125 {} 126 }; 127 128 class CPUConfig : public BaseCurrentOperatingConfig 129 { 130 private: 131 /** Objects describing all available SST configs - not modifiable. */ 132 std::vector<std::unique_ptr<OperatingConfig>> availConfigs; 133 sdbusplus::bus_t& bus; 134 const uint8_t peciAddress; 135 const std::string path; ///< D-Bus path of CPU object 136 const CPUModel cpuModel; 137 138 // Keep mutable copies of the properties so we can cache values that we 139 // retrieve in the getters. We don't want to throw an error on a D-Bus 140 // get-property call (extra error handling in clients), so by caching we can 141 // hide any temporary hiccup in PECI communication. 142 // These values can be changed by in-band software so we have to do a full 143 // PECI read on every get-property, and can't assume that values will change 144 // only when set-property is done. 145 mutable unsigned int currentLevel; 146 mutable bool bfEnabled; 147 148 /** 149 * Enforce common pre-conditions for D-Bus set property handlers. 150 */ 151 void setPropertyCheckOrThrow(SSTInterface& sst) 152 { 153 if (!sst.supportsControl()) 154 { 155 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 156 } 157 if (hostState != HostState::postComplete || !sst.ready()) 158 { 159 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable(); 160 } 161 } 162 163 public: 164 CPUConfig(sdbusplus::bus_t& bus_, uint8_t index, CPUModel model) : 165 BaseCurrentOperatingConfig(bus_, generatePath(index).c_str(), 166 action::defer_emit), 167 bus(bus_), peciAddress(index + MIN_CLIENT_ADDR), 168 path(generatePath(index)), cpuModel(model), currentLevel(0), 169 bfEnabled(false) 170 {} 171 172 // 173 // D-Bus Property Overrides 174 // 175 176 sdbusplus::message::object_path appliedConfig() const override 177 { 178 DEBUG_PRINT << "Reading AppliedConfig\n"; 179 if (hostState != HostState::off) 180 { 181 // Otherwise, try to read current state 182 auto sst = getInstance(peciAddress, cpuModel); 183 if (!sst || !sst->ready()) 184 { 185 std::cerr << __func__ 186 << ": Failed to get SST provider instance\n"; 187 } 188 else 189 { 190 try 191 { 192 currentLevel = sst->currentLevel(); 193 } 194 catch (const PECIError& error) 195 { 196 std::cerr << "Failed to get SST-PP level: " << error.what() 197 << "\n"; 198 } 199 } 200 } 201 return generateConfigPath(currentLevel); 202 } 203 204 bool baseSpeedPriorityEnabled() const override 205 { 206 DEBUG_PRINT << "Reading BaseSpeedPriorityEnabled\n"; 207 if (hostState != HostState::off) 208 { 209 auto sst = getInstance(peciAddress, cpuModel); 210 if (!sst || !sst->ready()) 211 { 212 std::cerr << __func__ 213 << ": Failed to get SST provider instance\n"; 214 } 215 else 216 { 217 try 218 { 219 bfEnabled = sst->bfEnabled(currentLevel); 220 } 221 catch (const PECIError& error) 222 { 223 std::cerr << "Failed to get SST-BF status: " << error.what() 224 << "\n"; 225 } 226 } 227 } 228 return bfEnabled; 229 } 230 231 sdbusplus::message::object_path 232 appliedConfig(sdbusplus::message::object_path value) override 233 { 234 DEBUG_PRINT << "Writing AppliedConfig\n"; 235 const OperatingConfig* newConfig = nullptr; 236 for (const auto& config : availConfigs) 237 { 238 if (config->path == value.str) 239 { 240 newConfig = config.get(); 241 } 242 } 243 244 if (newConfig == nullptr) 245 { 246 throw sdbusplus::xyz::openbmc_project::Common::Error:: 247 InvalidArgument(); 248 } 249 250 auto sst = getInstance(peciAddress, cpuModel); 251 if (!sst) 252 { 253 std::cerr << __func__ << ": Failed to get SST provider instance\n"; 254 return sdbusplus::message::object_path(); 255 } 256 setPropertyCheckOrThrow(*sst); 257 try 258 { 259 sst->setCurrentLevel(newConfig->level); 260 currentLevel = newConfig->level; 261 } 262 catch (const PECIError& error) 263 { 264 std::cerr << "Failed to set new SST-PP level: " << error.what() 265 << "\n"; 266 throw sdbusplus::xyz::openbmc_project::Common::Device::Error:: 267 WriteFailure(); 268 } 269 270 // return value not used 271 return sdbusplus::message::object_path(); 272 } 273 274 bool baseSpeedPriorityEnabled(bool /* value */) override 275 { 276 DEBUG_PRINT << "Writing BaseSpeedPriorityEnabled not allowed\n"; 277 throw sdbusplus::xyz::openbmc_project::Common::Error::NotAllowed(); 278 // return value not used 279 return false; 280 } 281 282 // 283 // Additions 284 // 285 286 OperatingConfig& newConfig(unsigned int level) 287 { 288 availConfigs.emplace_back(std::make_unique<OperatingConfig>( 289 bus, level, generateConfigPath(level))); 290 return *availConfigs.back(); 291 } 292 293 std::string generateConfigPath(unsigned int level) const 294 { 295 return path + "/config" + std::to_string(level); 296 } 297 298 /** 299 * Emit the interface added signals which were deferred. This is required 300 * for ObjectMapper to pick up the objects, if we initially defered the 301 * signal emitting. 302 */ 303 void finalize() 304 { 305 emit_added(); 306 for (auto& config : availConfigs) 307 { 308 config->emit_added(); 309 } 310 } 311 312 static std::string generatePath(int index) 313 { 314 return cpuPath + std::to_string(index); 315 } 316 }; 317 318 /** 319 * Retrieve the SST parameters for a single config and fill the values into the 320 * properties on the D-Bus interface. 321 * 322 * @param[in,out] sst Interface to SST backend. 323 * @param[in] level Config TDP level to retrieve. 324 * @param[out] config D-Bus interface to update. 325 */ 326 static void getSingleConfig(SSTInterface& sst, unsigned int level, 327 OperatingConfig& config) 328 { 329 config.powerLimit(sst.tdp(level)); 330 DEBUG_PRINT << " TDP = " << config.powerLimit() << '\n'; 331 332 config.availableCoreCount(sst.coreCount(level)); 333 DEBUG_PRINT << " coreCount = " << config.availableCoreCount() << '\n'; 334 335 config.baseSpeed(sst.p1Freq(level)); 336 DEBUG_PRINT << " baseSpeed = " << config.baseSpeed() << '\n'; 337 338 config.maxSpeed(sst.p0Freq(level)); 339 DEBUG_PRINT << " maxSpeed = " << config.maxSpeed() << '\n'; 340 341 config.maxJunctionTemperature(sst.prochotTemp(level)); 342 DEBUG_PRINT << " procHot = " << config.maxJunctionTemperature() << '\n'; 343 344 // Construct BaseSpeedPrioritySettings 345 std::vector<std::tuple<uint32_t, std::vector<uint32_t>>> baseSpeeds; 346 if (sst.bfSupported(level)) 347 { 348 std::vector<uint32_t> totalCoreList, loFreqCoreList, hiFreqCoreList; 349 totalCoreList = sst.enabledCoreList(level); 350 hiFreqCoreList = sst.bfHighPriorityCoreList(level); 351 std::set_difference( 352 totalCoreList.begin(), totalCoreList.end(), hiFreqCoreList.begin(), 353 hiFreqCoreList.end(), 354 std::inserter(loFreqCoreList, loFreqCoreList.begin())); 355 356 baseSpeeds = {{sst.bfHighPriorityFreq(level), hiFreqCoreList}, 357 {sst.bfLowPriorityFreq(level), loFreqCoreList}}; 358 } 359 config.baseSpeedPrioritySettings(baseSpeeds); 360 361 config.turboProfile(sst.sseTurboProfile(level)); 362 } 363 364 /** 365 * Retrieve all SST configuration info for all discoverable CPUs, and publish 366 * the info on new D-Bus objects on the given bus connection. 367 * 368 * @param[in,out] ioc ASIO context. 369 * @param[in,out] conn D-Bus ASIO connection. 370 * 371 * @return Whether discovery was successfully finished. 372 * 373 * @throw PECIError A PECI command failed on a CPU which had previously 374 * responded to a command. 375 */ 376 static bool discoverCPUsAndConfigs(boost::asio::io_context& ioc, 377 sdbusplus::asio::connection& conn) 378 { 379 // Persistent list - only populated after complete/successful discovery 380 static std::vector<std::unique_ptr<CPUConfig>> cpus; 381 cpus.clear(); 382 383 // Temporary staging list. In case there is any failure, these temporary 384 // objects will get dropped to avoid presenting incomplete info until the 385 // next discovery attempt. 386 std::vector<std::unique_ptr<CPUConfig>> cpuList; 387 388 for (uint8_t i = MIN_CLIENT_ADDR; i <= MAX_CLIENT_ADDR; ++i) 389 { 390 // Let the event handler run any waiting tasks. If there is a lot of 391 // PECI contention, SST discovery could take a long time. This lets us 392 // get updates to hostState and handle any D-Bus requests. 393 ioc.poll(); 394 395 if (hostState == HostState::off) 396 { 397 return false; 398 } 399 400 unsigned int cpuIndex = i - MIN_CLIENT_ADDR; 401 DEBUG_PRINT << "Discovering CPU " << cpuIndex << '\n'; 402 403 // We could possibly check D-Bus for CPU presence and model, but PECI is 404 // 10x faster and so much simpler. 405 uint8_t cc, stepping; 406 CPUModel cpuModel; 407 EPECIStatus status = peci_GetCPUID(i, &cpuModel, &stepping, &cc); 408 if (status == PECI_CC_TIMEOUT) 409 { 410 // Timing out indicates the CPU is present but PCS services not 411 // working yet. Try again later. 412 throw PECIError("Get CPUID timed out"); 413 } 414 if (status == PECI_CC_CPU_NOT_PRESENT) 415 { 416 continue; 417 } 418 if (status != PECI_CC_SUCCESS || cc != PECI_DEV_CC_SUCCESS) 419 { 420 std::cerr << "GetCPUID returned status " << status 421 << ", cc = " << cc << '\n'; 422 continue; 423 } 424 425 std::unique_ptr<SSTInterface> sst = getInstance(i, cpuModel); 426 427 if (!sst) 428 { 429 // No supported backend for this CPU. 430 continue; 431 } 432 433 if (!sst->ready()) 434 { 435 // Supported CPU but it can't be queried yet. Try again later. 436 std::cerr << "sst not ready yet\n"; 437 return false; 438 } 439 440 if (!sst->ppEnabled()) 441 { 442 // Supported CPU but the specific SKU doesn't support SST-PP. 443 std::cerr << "CPU doesn't support SST-PP\n"; 444 continue; 445 } 446 447 // Create the per-CPU configuration object 448 cpuList.emplace_back( 449 std::make_unique<CPUConfig>(conn, cpuIndex, cpuModel)); 450 CPUConfig& cpu = *cpuList.back(); 451 452 bool foundCurrentLevel = false; 453 454 for (unsigned int level = 0; level <= sst->maxLevel(); ++level) 455 { 456 DEBUG_PRINT << "checking level " << level << ": "; 457 // levels 1 and 2 were legacy/deprecated, originally used for AVX 458 // license pre-granting. They may be reused for more levels in 459 // future generations. So we need to check for discontinuities. 460 if (!sst->levelSupported(level)) 461 { 462 DEBUG_PRINT << "not supported\n"; 463 continue; 464 } 465 466 DEBUG_PRINT << "supported\n"; 467 468 getSingleConfig(*sst, level, cpu.newConfig(level)); 469 470 if (level == sst->currentLevel()) 471 { 472 foundCurrentLevel = true; 473 } 474 } 475 476 DEBUG_PRINT << "current level is " << sst->currentLevel() << '\n'; 477 478 if (!foundCurrentLevel) 479 { 480 // In case we didn't encounter a PECI error, but also didn't find 481 // the config which is supposedly applied, we won't be able to 482 // populate the CurrentOperatingConfig so we have to remove this CPU 483 // from consideration. 484 std::cerr << "CPU " << cpuIndex 485 << " claimed SST support but invalid configs\n"; 486 cpuList.pop_back(); 487 continue; 488 } 489 } 490 491 cpuList.swap(cpus); 492 std::for_each(cpus.begin(), cpus.end(), [](auto& cpu) { cpu->finalize(); }); 493 return true; 494 } 495 496 /** 497 * Attempt discovery process, and if it fails, wait for 10 seconds to try again. 498 */ 499 static void discoverOrWait() 500 { 501 static boost::asio::steady_timer peciRetryTimer(dbus::getIOContext()); 502 static int peciErrorCount = 0; 503 bool finished = false; 504 505 // This function may be called from hostStateHandler or by retrying itself. 506 // In case those overlap, cancel any outstanding retry timer. 507 peciRetryTimer.cancel(); 508 509 try 510 { 511 DEBUG_PRINT << "Starting discovery\n"; 512 finished = discoverCPUsAndConfigs(dbus::getIOContext(), 513 *dbus::getConnection()); 514 } 515 catch (const PECIError& err) 516 { 517 std::cerr << "PECI Error: " << err.what() << '\n'; 518 519 // In case of repeated failure to finish discovery, turn off this 520 // feature altogether. Possible cause is that the CPU model does not 521 // actually support the necessary commands. 522 if (++peciErrorCount >= 50) 523 { 524 std::cerr << "Aborting SST discovery\n"; 525 return; 526 } 527 528 std::cerr << "Retrying SST discovery later\n"; 529 } 530 531 DEBUG_PRINT << "Finished discovery attempt: " << finished << '\n'; 532 533 // Retry later if no CPUs were available, or there was a PECI error. 534 if (!finished) 535 { 536 peciRetryTimer.expires_after(std::chrono::seconds(10)); 537 peciRetryTimer.async_wait([](boost::system::error_code ec) { 538 if (ec) 539 { 540 if (ec != boost::asio::error::operation_aborted) 541 { 542 std::cerr << "SST PECI Retry Timer failed: " << ec << '\n'; 543 } 544 return; 545 } 546 discoverOrWait(); 547 }); 548 } 549 } 550 551 static void hostStateHandler(HostState prevState, HostState) 552 { 553 if (prevState == HostState::off) 554 { 555 // Start or re-start discovery any time the host moves out of the 556 // powered off state. 557 discoverOrWait(); 558 } 559 } 560 561 void init() 562 { 563 addHostStateCallback(hostStateHandler); 564 } 565 566 } // namespace sst 567 } // namespace cpu_info 568