1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/module.h> 27 #include <linux/init.h> 28 #include <linux/smp.h> 29 #include <linux/sched.h> 30 #include <linux/cpufreq.h> 31 #include <linux/compiler.h> 32 #include <linux/dmi.h> 33 #include <linux/slab.h> 34 #include <linux/acpi.h> 35 #include <linux/io.h> 36 #include <linux/delay.h> 37 #include <linux/uaccess.h> 38 #include <linux/static_call.h> 39 40 #include <acpi/processor.h> 41 #include <acpi/cppc_acpi.h> 42 43 #include <asm/msr.h> 44 #include <asm/processor.h> 45 #include <asm/cpufeature.h> 46 #include <asm/cpu_device_id.h> 47 #include "amd-pstate-trace.h" 48 49 #define AMD_PSTATE_TRANSITION_LATENCY 0x20000 50 #define AMD_PSTATE_TRANSITION_DELAY 500 51 52 /* 53 * TODO: We need more time to fine tune processors with shared memory solution 54 * with community together. 55 * 56 * There are some performance drops on the CPU benchmarks which reports from 57 * Suse. We are co-working with them to fine tune the shared memory solution. So 58 * we disable it by default to go acpi-cpufreq on these processors and add a 59 * module parameter to be able to enable it manually for debugging. 60 */ 61 static bool shared_mem = false; 62 module_param(shared_mem, bool, 0444); 63 MODULE_PARM_DESC(shared_mem, 64 "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); 65 66 static struct cpufreq_driver amd_pstate_driver; 67 68 /** 69 * struct amd_cpudata - private CPU data for AMD P-State 70 * @cpu: CPU number 71 * @req: constraint request to apply 72 * @cppc_req_cached: cached performance request hints 73 * @highest_perf: the maximum performance an individual processor may reach, 74 * assuming ideal conditions 75 * @nominal_perf: the maximum sustained performance level of the processor, 76 * assuming ideal operating conditions 77 * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power 78 * savings are achieved 79 * @lowest_perf: the absolute lowest performance level of the processor 80 * @max_freq: the frequency that mapped to highest_perf 81 * @min_freq: the frequency that mapped to lowest_perf 82 * @nominal_freq: the frequency that mapped to nominal_perf 83 * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf 84 * @boost_supported: check whether the Processor or SBIOS supports boost mode 85 * 86 * The amd_cpudata is key private data for each CPU thread in AMD P-State, and 87 * represents all the attributes and goals that AMD P-State requests at runtime. 88 */ 89 struct amd_cpudata { 90 int cpu; 91 92 struct freq_qos_request req[2]; 93 u64 cppc_req_cached; 94 95 u32 highest_perf; 96 u32 nominal_perf; 97 u32 lowest_nonlinear_perf; 98 u32 lowest_perf; 99 100 u32 max_freq; 101 u32 min_freq; 102 u32 nominal_freq; 103 u32 lowest_nonlinear_freq; 104 105 bool boost_supported; 106 }; 107 108 static inline int pstate_enable(bool enable) 109 { 110 return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); 111 } 112 113 static int cppc_enable(bool enable) 114 { 115 int cpu, ret = 0; 116 117 for_each_present_cpu(cpu) { 118 ret = cppc_set_enable(cpu, enable); 119 if (ret) 120 return ret; 121 } 122 123 return ret; 124 } 125 126 DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); 127 128 static inline int amd_pstate_enable(bool enable) 129 { 130 return static_call(amd_pstate_enable)(enable); 131 } 132 133 static int pstate_init_perf(struct amd_cpudata *cpudata) 134 { 135 u64 cap1; 136 137 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 138 &cap1); 139 if (ret) 140 return ret; 141 142 /* 143 * TODO: Introduce AMD specific power feature. 144 * 145 * CPPC entry doesn't indicate the highest performance in some ASICs. 146 */ 147 WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); 148 149 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); 150 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); 151 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); 152 153 return 0; 154 } 155 156 static int cppc_init_perf(struct amd_cpudata *cpudata) 157 { 158 struct cppc_perf_caps cppc_perf; 159 160 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 161 if (ret) 162 return ret; 163 164 WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); 165 166 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); 167 WRITE_ONCE(cpudata->lowest_nonlinear_perf, 168 cppc_perf.lowest_nonlinear_perf); 169 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); 170 171 return 0; 172 } 173 174 DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); 175 176 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 177 { 178 return static_call(amd_pstate_init_perf)(cpudata); 179 } 180 181 static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 182 u32 des_perf, u32 max_perf, bool fast_switch) 183 { 184 if (fast_switch) 185 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); 186 else 187 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 188 READ_ONCE(cpudata->cppc_req_cached)); 189 } 190 191 static void cppc_update_perf(struct amd_cpudata *cpudata, 192 u32 min_perf, u32 des_perf, 193 u32 max_perf, bool fast_switch) 194 { 195 struct cppc_perf_ctrls perf_ctrls; 196 197 perf_ctrls.max_perf = max_perf; 198 perf_ctrls.min_perf = min_perf; 199 perf_ctrls.desired_perf = des_perf; 200 201 cppc_set_perf(cpudata->cpu, &perf_ctrls); 202 } 203 204 DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); 205 206 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, 207 u32 min_perf, u32 des_perf, 208 u32 max_perf, bool fast_switch) 209 { 210 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, 211 max_perf, fast_switch); 212 } 213 214 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 215 u32 des_perf, u32 max_perf, bool fast_switch) 216 { 217 u64 prev = READ_ONCE(cpudata->cppc_req_cached); 218 u64 value = prev; 219 220 value &= ~AMD_CPPC_MIN_PERF(~0L); 221 value |= AMD_CPPC_MIN_PERF(min_perf); 222 223 value &= ~AMD_CPPC_DES_PERF(~0L); 224 value |= AMD_CPPC_DES_PERF(des_perf); 225 226 value &= ~AMD_CPPC_MAX_PERF(~0L); 227 value |= AMD_CPPC_MAX_PERF(max_perf); 228 229 trace_amd_pstate_perf(min_perf, des_perf, max_perf, 230 cpudata->cpu, (value != prev), fast_switch); 231 232 if (value == prev) 233 return; 234 235 WRITE_ONCE(cpudata->cppc_req_cached, value); 236 237 amd_pstate_update_perf(cpudata, min_perf, des_perf, 238 max_perf, fast_switch); 239 } 240 241 static int amd_pstate_verify(struct cpufreq_policy_data *policy) 242 { 243 cpufreq_verify_within_cpu_limits(policy); 244 245 return 0; 246 } 247 248 static int amd_pstate_target(struct cpufreq_policy *policy, 249 unsigned int target_freq, 250 unsigned int relation) 251 { 252 struct cpufreq_freqs freqs; 253 struct amd_cpudata *cpudata = policy->driver_data; 254 unsigned long max_perf, min_perf, des_perf, cap_perf; 255 256 if (!cpudata->max_freq) 257 return -ENODEV; 258 259 cap_perf = READ_ONCE(cpudata->highest_perf); 260 min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 261 max_perf = cap_perf; 262 263 freqs.old = policy->cur; 264 freqs.new = target_freq; 265 266 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, 267 cpudata->max_freq); 268 269 cpufreq_freq_transition_begin(policy, &freqs); 270 amd_pstate_update(cpudata, min_perf, des_perf, 271 max_perf, false); 272 cpufreq_freq_transition_end(policy, &freqs, false); 273 274 return 0; 275 } 276 277 static void amd_pstate_adjust_perf(unsigned int cpu, 278 unsigned long _min_perf, 279 unsigned long target_perf, 280 unsigned long capacity) 281 { 282 unsigned long max_perf, min_perf, des_perf, 283 cap_perf, lowest_nonlinear_perf; 284 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 285 struct amd_cpudata *cpudata = policy->driver_data; 286 287 cap_perf = READ_ONCE(cpudata->highest_perf); 288 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 289 290 des_perf = cap_perf; 291 if (target_perf < capacity) 292 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 293 294 min_perf = READ_ONCE(cpudata->highest_perf); 295 if (_min_perf < capacity) 296 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 297 298 if (min_perf < lowest_nonlinear_perf) 299 min_perf = lowest_nonlinear_perf; 300 301 max_perf = cap_perf; 302 if (max_perf < min_perf) 303 max_perf = min_perf; 304 305 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 306 307 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); 308 } 309 310 static int amd_get_min_freq(struct amd_cpudata *cpudata) 311 { 312 struct cppc_perf_caps cppc_perf; 313 314 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 315 if (ret) 316 return ret; 317 318 /* Switch to khz */ 319 return cppc_perf.lowest_freq * 1000; 320 } 321 322 static int amd_get_max_freq(struct amd_cpudata *cpudata) 323 { 324 struct cppc_perf_caps cppc_perf; 325 u32 max_perf, max_freq, nominal_freq, nominal_perf; 326 u64 boost_ratio; 327 328 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 329 if (ret) 330 return ret; 331 332 nominal_freq = cppc_perf.nominal_freq; 333 nominal_perf = READ_ONCE(cpudata->nominal_perf); 334 max_perf = READ_ONCE(cpudata->highest_perf); 335 336 boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, 337 nominal_perf); 338 339 max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; 340 341 /* Switch to khz */ 342 return max_freq * 1000; 343 } 344 345 static int amd_get_nominal_freq(struct amd_cpudata *cpudata) 346 { 347 struct cppc_perf_caps cppc_perf; 348 349 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 350 if (ret) 351 return ret; 352 353 /* Switch to khz */ 354 return cppc_perf.nominal_freq * 1000; 355 } 356 357 static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) 358 { 359 struct cppc_perf_caps cppc_perf; 360 u32 lowest_nonlinear_freq, lowest_nonlinear_perf, 361 nominal_freq, nominal_perf; 362 u64 lowest_nonlinear_ratio; 363 364 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 365 if (ret) 366 return ret; 367 368 nominal_freq = cppc_perf.nominal_freq; 369 nominal_perf = READ_ONCE(cpudata->nominal_perf); 370 371 lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; 372 373 lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 374 nominal_perf); 375 376 lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; 377 378 /* Switch to khz */ 379 return lowest_nonlinear_freq * 1000; 380 } 381 382 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 383 { 384 struct amd_cpudata *cpudata = policy->driver_data; 385 int ret; 386 387 if (!cpudata->boost_supported) { 388 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 389 return -EINVAL; 390 } 391 392 if (state) 393 policy->cpuinfo.max_freq = cpudata->max_freq; 394 else 395 policy->cpuinfo.max_freq = cpudata->nominal_freq; 396 397 policy->max = policy->cpuinfo.max_freq; 398 399 ret = freq_qos_update_request(&cpudata->req[1], 400 policy->cpuinfo.max_freq); 401 if (ret < 0) 402 return ret; 403 404 return 0; 405 } 406 407 static void amd_pstate_boost_init(struct amd_cpudata *cpudata) 408 { 409 u32 highest_perf, nominal_perf; 410 411 highest_perf = READ_ONCE(cpudata->highest_perf); 412 nominal_perf = READ_ONCE(cpudata->nominal_perf); 413 414 if (highest_perf <= nominal_perf) 415 return; 416 417 cpudata->boost_supported = true; 418 amd_pstate_driver.boost_enabled = true; 419 } 420 421 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 422 { 423 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 424 struct device *dev; 425 struct amd_cpudata *cpudata; 426 427 dev = get_cpu_device(policy->cpu); 428 if (!dev) 429 return -ENODEV; 430 431 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 432 if (!cpudata) 433 return -ENOMEM; 434 435 cpudata->cpu = policy->cpu; 436 437 ret = amd_pstate_init_perf(cpudata); 438 if (ret) 439 goto free_cpudata1; 440 441 min_freq = amd_get_min_freq(cpudata); 442 max_freq = amd_get_max_freq(cpudata); 443 nominal_freq = amd_get_nominal_freq(cpudata); 444 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 445 446 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 447 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 448 min_freq, max_freq); 449 ret = -EINVAL; 450 goto free_cpudata1; 451 } 452 453 policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; 454 policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; 455 456 policy->min = min_freq; 457 policy->max = max_freq; 458 459 policy->cpuinfo.min_freq = min_freq; 460 policy->cpuinfo.max_freq = max_freq; 461 462 /* It will be updated by governor */ 463 policy->cur = policy->cpuinfo.min_freq; 464 465 if (boot_cpu_has(X86_FEATURE_CPPC)) 466 policy->fast_switch_possible = true; 467 468 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 469 FREQ_QOS_MIN, policy->cpuinfo.min_freq); 470 if (ret < 0) { 471 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 472 goto free_cpudata1; 473 } 474 475 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 476 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 477 if (ret < 0) { 478 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 479 goto free_cpudata2; 480 } 481 482 /* Initial processor data capability frequencies */ 483 cpudata->max_freq = max_freq; 484 cpudata->min_freq = min_freq; 485 cpudata->nominal_freq = nominal_freq; 486 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 487 488 policy->driver_data = cpudata; 489 490 amd_pstate_boost_init(cpudata); 491 492 return 0; 493 494 free_cpudata2: 495 freq_qos_remove_request(&cpudata->req[0]); 496 free_cpudata1: 497 kfree(cpudata); 498 return ret; 499 } 500 501 static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) 502 { 503 struct amd_cpudata *cpudata; 504 505 cpudata = policy->driver_data; 506 507 freq_qos_remove_request(&cpudata->req[1]); 508 freq_qos_remove_request(&cpudata->req[0]); 509 kfree(cpudata); 510 511 return 0; 512 } 513 514 /* Sysfs attributes */ 515 516 /* 517 * This frequency is to indicate the maximum hardware frequency. 518 * If boost is not active but supported, the frequency will be larger than the 519 * one in cpuinfo. 520 */ 521 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 522 char *buf) 523 { 524 int max_freq; 525 struct amd_cpudata *cpudata; 526 527 cpudata = policy->driver_data; 528 529 max_freq = amd_get_max_freq(cpudata); 530 if (max_freq < 0) 531 return max_freq; 532 533 return sprintf(&buf[0], "%u\n", max_freq); 534 } 535 536 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 537 char *buf) 538 { 539 int freq; 540 struct amd_cpudata *cpudata; 541 542 cpudata = policy->driver_data; 543 544 freq = amd_get_lowest_nonlinear_freq(cpudata); 545 if (freq < 0) 546 return freq; 547 548 return sprintf(&buf[0], "%u\n", freq); 549 } 550 551 /* 552 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 553 * need to expose it to sysfs. 554 */ 555 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 556 char *buf) 557 { 558 u32 perf; 559 struct amd_cpudata *cpudata = policy->driver_data; 560 561 perf = READ_ONCE(cpudata->highest_perf); 562 563 return sprintf(&buf[0], "%u\n", perf); 564 } 565 566 cpufreq_freq_attr_ro(amd_pstate_max_freq); 567 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 568 569 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 570 571 static struct freq_attr *amd_pstate_attr[] = { 572 &amd_pstate_max_freq, 573 &amd_pstate_lowest_nonlinear_freq, 574 &amd_pstate_highest_perf, 575 NULL, 576 }; 577 578 static struct cpufreq_driver amd_pstate_driver = { 579 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 580 .verify = amd_pstate_verify, 581 .target = amd_pstate_target, 582 .init = amd_pstate_cpu_init, 583 .exit = amd_pstate_cpu_exit, 584 .set_boost = amd_pstate_set_boost, 585 .name = "amd-pstate", 586 .attr = amd_pstate_attr, 587 }; 588 589 static int __init amd_pstate_init(void) 590 { 591 int ret; 592 593 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 594 return -ENODEV; 595 596 if (!acpi_cpc_valid()) { 597 pr_debug("the _CPC object is not present in SBIOS\n"); 598 return -ENODEV; 599 } 600 601 /* don't keep reloading if cpufreq_driver exists */ 602 if (cpufreq_get_current_driver()) 603 return -EEXIST; 604 605 /* capability check */ 606 if (boot_cpu_has(X86_FEATURE_CPPC)) { 607 pr_debug("AMD CPPC MSR based functionality is supported\n"); 608 amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; 609 } else if (shared_mem) { 610 static_call_update(amd_pstate_enable, cppc_enable); 611 static_call_update(amd_pstate_init_perf, cppc_init_perf); 612 static_call_update(amd_pstate_update_perf, cppc_update_perf); 613 } else { 614 pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); 615 return -ENODEV; 616 } 617 618 /* enable amd pstate feature */ 619 ret = amd_pstate_enable(true); 620 if (ret) { 621 pr_err("failed to enable amd-pstate with return %d\n", ret); 622 return ret; 623 } 624 625 ret = cpufreq_register_driver(&amd_pstate_driver); 626 if (ret) 627 pr_err("failed to register amd_pstate_driver with return %d\n", 628 ret); 629 630 return ret; 631 } 632 633 static void __exit amd_pstate_exit(void) 634 { 635 cpufreq_unregister_driver(&amd_pstate_driver); 636 637 amd_pstate_enable(false); 638 } 639 640 module_init(amd_pstate_init); 641 module_exit(amd_pstate_exit); 642 643 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 644 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 645 MODULE_LICENSE("GPL"); 646