1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/kernel.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/module.h> 18 #include <linux/ktime.h> 19 #include <linux/hrtimer.h> 20 #include <linux/tick.h> 21 #include <linux/slab.h> 22 #include <linux/sched/cpufreq.h> 23 #include <linux/list.h> 24 #include <linux/cpu.h> 25 #include <linux/cpufreq.h> 26 #include <linux/sysfs.h> 27 #include <linux/types.h> 28 #include <linux/fs.h> 29 #include <linux/acpi.h> 30 #include <linux/vmalloc.h> 31 #include <trace/events/power.h> 32 33 #include <asm/div64.h> 34 #include <asm/msr.h> 35 #include <asm/cpu_device_id.h> 36 #include <asm/cpufeature.h> 37 #include <asm/intel-family.h> 38 39 #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) 40 41 #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 42 #define INTEL_CPUFREQ_TRANSITION_DELAY 500 43 44 #ifdef CONFIG_ACPI 45 #include <acpi/processor.h> 46 #include <acpi/cppc_acpi.h> 47 #endif 48 49 #define FRAC_BITS 8 50 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 51 #define fp_toint(X) ((X) >> FRAC_BITS) 52 53 #define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3)) 54 55 #define EXT_BITS 6 56 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS) 57 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS) 58 #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS) 59 60 static inline int32_t mul_fp(int32_t x, int32_t y) 61 { 62 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 63 } 64 65 static inline int32_t div_fp(s64 x, s64 y) 66 { 67 return div64_s64((int64_t)x << FRAC_BITS, y); 68 } 69 70 static inline int ceiling_fp(int32_t x) 71 { 72 int mask, ret; 73 74 ret = fp_toint(x); 75 mask = (1 << FRAC_BITS) - 1; 76 if (x & mask) 77 ret += 1; 78 return ret; 79 } 80 81 static inline int32_t percent_fp(int percent) 82 { 83 return div_fp(percent, 100); 84 } 85 86 static inline u64 mul_ext_fp(u64 x, u64 y) 87 { 88 return (x * y) >> EXT_FRAC_BITS; 89 } 90 91 static inline u64 div_ext_fp(u64 x, u64 y) 92 { 93 return div64_u64(x << EXT_FRAC_BITS, y); 94 } 95 96 static inline int32_t percent_ext_fp(int percent) 97 { 98 return div_ext_fp(percent, 100); 99 } 100 101 /** 102 * struct sample - Store performance sample 103 * @core_avg_perf: Ratio of APERF/MPERF which is the actual average 104 * performance during last sample period 105 * @busy_scaled: Scaled busy value which is used to calculate next 106 * P state. This can be different than core_avg_perf 107 * to account for cpu idle period 108 * @aperf: Difference of actual performance frequency clock count 109 * read from APERF MSR between last and current sample 110 * @mperf: Difference of maximum performance frequency clock count 111 * read from MPERF MSR between last and current sample 112 * @tsc: Difference of time stamp counter between last and 113 * current sample 114 * @time: Current time from scheduler 115 * 116 * This structure is used in the cpudata structure to store performance sample 117 * data for choosing next P State. 118 */ 119 struct sample { 120 int32_t core_avg_perf; 121 int32_t busy_scaled; 122 u64 aperf; 123 u64 mperf; 124 u64 tsc; 125 u64 time; 126 }; 127 128 /** 129 * struct pstate_data - Store P state data 130 * @current_pstate: Current requested P state 131 * @min_pstate: Min P state possible for this platform 132 * @max_pstate: Max P state possible for this platform 133 * @max_pstate_physical:This is physical Max P state for a processor 134 * This can be higher than the max_pstate which can 135 * be limited by platform thermal design power limits 136 * @scaling: Scaling factor to convert frequency to cpufreq 137 * frequency units 138 * @turbo_pstate: Max Turbo P state possible for this platform 139 * @max_freq: @max_pstate frequency in cpufreq units 140 * @turbo_freq: @turbo_pstate frequency in cpufreq units 141 * 142 * Stores the per cpu model P state limits and current P state. 143 */ 144 struct pstate_data { 145 int current_pstate; 146 int min_pstate; 147 int max_pstate; 148 int max_pstate_physical; 149 int scaling; 150 int turbo_pstate; 151 unsigned int max_freq; 152 unsigned int turbo_freq; 153 }; 154 155 /** 156 * struct vid_data - Stores voltage information data 157 * @min: VID data for this platform corresponding to 158 * the lowest P state 159 * @max: VID data corresponding to the highest P State. 160 * @turbo: VID data for turbo P state 161 * @ratio: Ratio of (vid max - vid min) / 162 * (max P state - Min P State) 163 * 164 * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) 165 * This data is used in Atom platforms, where in addition to target P state, 166 * the voltage data needs to be specified to select next P State. 167 */ 168 struct vid_data { 169 int min; 170 int max; 171 int turbo; 172 int32_t ratio; 173 }; 174 175 /** 176 * struct global_params - Global parameters, mostly tunable via sysfs. 177 * @no_turbo: Whether or not to use turbo P-states. 178 * @turbo_disabled: Whethet or not turbo P-states are available at all, 179 * based on the MSR_IA32_MISC_ENABLE value and whether or 180 * not the maximum reported turbo P-state is different from 181 * the maximum reported non-turbo one. 182 * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo 183 * P-state capacity. 184 * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo 185 * P-state capacity. 186 */ 187 struct global_params { 188 bool no_turbo; 189 bool turbo_disabled; 190 int max_perf_pct; 191 int min_perf_pct; 192 }; 193 194 /** 195 * struct cpudata - Per CPU instance data storage 196 * @cpu: CPU number for this instance data 197 * @policy: CPUFreq policy value 198 * @update_util: CPUFreq utility callback information 199 * @update_util_set: CPUFreq utility callback is set 200 * @iowait_boost: iowait-related boost fraction 201 * @last_update: Time of the last update. 202 * @pstate: Stores P state limits for this CPU 203 * @vid: Stores VID limits for this CPU 204 * @last_sample_time: Last Sample time 205 * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented 206 * This shift is a multiplier to mperf delta to 207 * calculate CPU busy. 208 * @prev_aperf: Last APERF value read from APERF MSR 209 * @prev_mperf: Last MPERF value read from MPERF MSR 210 * @prev_tsc: Last timestamp counter (TSC) value 211 * @prev_cummulative_iowait: IO Wait time difference from last and 212 * current sample 213 * @sample: Storage for storing last Sample data 214 * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios 215 * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios 216 * @acpi_perf_data: Stores ACPI perf information read from _PSS 217 * @valid_pss_table: Set to true for valid ACPI _PSS entries found 218 * @epp_powersave: Last saved HWP energy performance preference 219 * (EPP) or energy performance bias (EPB), 220 * when policy switched to performance 221 * @epp_policy: Last saved policy used to set EPP/EPB 222 * @epp_default: Power on default HWP energy performance 223 * preference/bias 224 * @epp_saved: Saved EPP/EPB during system suspend or CPU offline 225 * operation 226 * @hwp_req_cached: Cached value of the last HWP Request MSR 227 * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR 228 * @last_io_update: Last time when IO wake flag was set 229 * @sched_flags: Store scheduler flags for possible cross CPU update 230 * @hwp_boost_min: Last HWP boosted min performance 231 * 232 * This structure stores per CPU instance data for all CPUs. 233 */ 234 struct cpudata { 235 int cpu; 236 237 unsigned int policy; 238 struct update_util_data update_util; 239 bool update_util_set; 240 241 struct pstate_data pstate; 242 struct vid_data vid; 243 244 u64 last_update; 245 u64 last_sample_time; 246 u64 aperf_mperf_shift; 247 u64 prev_aperf; 248 u64 prev_mperf; 249 u64 prev_tsc; 250 u64 prev_cummulative_iowait; 251 struct sample sample; 252 int32_t min_perf_ratio; 253 int32_t max_perf_ratio; 254 #ifdef CONFIG_ACPI 255 struct acpi_processor_performance acpi_perf_data; 256 bool valid_pss_table; 257 #endif 258 unsigned int iowait_boost; 259 s16 epp_powersave; 260 s16 epp_policy; 261 s16 epp_default; 262 s16 epp_saved; 263 u64 hwp_req_cached; 264 u64 hwp_cap_cached; 265 u64 last_io_update; 266 unsigned int sched_flags; 267 u32 hwp_boost_min; 268 }; 269 270 static struct cpudata **all_cpu_data; 271 272 /** 273 * struct pstate_funcs - Per CPU model specific callbacks 274 * @get_max: Callback to get maximum non turbo effective P state 275 * @get_max_physical: Callback to get maximum non turbo physical P state 276 * @get_min: Callback to get minimum P state 277 * @get_turbo: Callback to get turbo P state 278 * @get_scaling: Callback to get frequency scaling factor 279 * @get_val: Callback to convert P state to actual MSR write value 280 * @get_vid: Callback to get VID data for Atom platforms 281 * 282 * Core and Atom CPU models have different way to get P State limits. This 283 * structure is used to store those callbacks. 284 */ 285 struct pstate_funcs { 286 int (*get_max)(void); 287 int (*get_max_physical)(void); 288 int (*get_min)(void); 289 int (*get_turbo)(void); 290 int (*get_scaling)(void); 291 int (*get_aperf_mperf_shift)(void); 292 u64 (*get_val)(struct cpudata*, int pstate); 293 void (*get_vid)(struct cpudata *); 294 }; 295 296 static struct pstate_funcs pstate_funcs __read_mostly; 297 298 static int hwp_active __read_mostly; 299 static int hwp_mode_bdw __read_mostly; 300 static bool per_cpu_limits __read_mostly; 301 static bool hwp_boost __read_mostly; 302 303 static struct cpufreq_driver *intel_pstate_driver __read_mostly; 304 305 #ifdef CONFIG_ACPI 306 static bool acpi_ppc; 307 #endif 308 309 static struct global_params global; 310 311 static DEFINE_MUTEX(intel_pstate_driver_lock); 312 static DEFINE_MUTEX(intel_pstate_limits_lock); 313 314 #ifdef CONFIG_ACPI 315 316 static bool intel_pstate_acpi_pm_profile_server(void) 317 { 318 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || 319 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) 320 return true; 321 322 return false; 323 } 324 325 static bool intel_pstate_get_ppc_enable_status(void) 326 { 327 if (intel_pstate_acpi_pm_profile_server()) 328 return true; 329 330 return acpi_ppc; 331 } 332 333 #ifdef CONFIG_ACPI_CPPC_LIB 334 335 /* The work item is needed to avoid CPU hotplug locking issues */ 336 static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) 337 { 338 sched_set_itmt_support(); 339 } 340 341 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); 342 343 static void intel_pstate_set_itmt_prio(int cpu) 344 { 345 struct cppc_perf_caps cppc_perf; 346 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX; 347 int ret; 348 349 ret = cppc_get_perf_caps(cpu, &cppc_perf); 350 if (ret) 351 return; 352 353 /* 354 * The priorities can be set regardless of whether or not 355 * sched_set_itmt_support(true) has been called and it is valid to 356 * update them at any time after it has been called. 357 */ 358 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); 359 360 if (max_highest_perf <= min_highest_perf) { 361 if (cppc_perf.highest_perf > max_highest_perf) 362 max_highest_perf = cppc_perf.highest_perf; 363 364 if (cppc_perf.highest_perf < min_highest_perf) 365 min_highest_perf = cppc_perf.highest_perf; 366 367 if (max_highest_perf > min_highest_perf) { 368 /* 369 * This code can be run during CPU online under the 370 * CPU hotplug locks, so sched_set_itmt_support() 371 * cannot be called from here. Queue up a work item 372 * to invoke it. 373 */ 374 schedule_work(&sched_itmt_work); 375 } 376 } 377 } 378 379 static int intel_pstate_get_cppc_guranteed(int cpu) 380 { 381 struct cppc_perf_caps cppc_perf; 382 int ret; 383 384 ret = cppc_get_perf_caps(cpu, &cppc_perf); 385 if (ret) 386 return ret; 387 388 if (cppc_perf.guaranteed_perf) 389 return cppc_perf.guaranteed_perf; 390 391 return cppc_perf.nominal_perf; 392 } 393 394 #else /* CONFIG_ACPI_CPPC_LIB */ 395 static void intel_pstate_set_itmt_prio(int cpu) 396 { 397 } 398 #endif /* CONFIG_ACPI_CPPC_LIB */ 399 400 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 401 { 402 struct cpudata *cpu; 403 int ret; 404 int i; 405 406 if (hwp_active) { 407 intel_pstate_set_itmt_prio(policy->cpu); 408 return; 409 } 410 411 if (!intel_pstate_get_ppc_enable_status()) 412 return; 413 414 cpu = all_cpu_data[policy->cpu]; 415 416 ret = acpi_processor_register_performance(&cpu->acpi_perf_data, 417 policy->cpu); 418 if (ret) 419 return; 420 421 /* 422 * Check if the control value in _PSS is for PERF_CTL MSR, which should 423 * guarantee that the states returned by it map to the states in our 424 * list directly. 425 */ 426 if (cpu->acpi_perf_data.control_register.space_id != 427 ACPI_ADR_SPACE_FIXED_HARDWARE) 428 goto err; 429 430 /* 431 * If there is only one entry _PSS, simply ignore _PSS and continue as 432 * usual without taking _PSS into account 433 */ 434 if (cpu->acpi_perf_data.state_count < 2) 435 goto err; 436 437 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); 438 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { 439 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", 440 (i == cpu->acpi_perf_data.state ? '*' : ' '), i, 441 (u32) cpu->acpi_perf_data.states[i].core_frequency, 442 (u32) cpu->acpi_perf_data.states[i].power, 443 (u32) cpu->acpi_perf_data.states[i].control); 444 } 445 446 /* 447 * The _PSS table doesn't contain whole turbo frequency range. 448 * This just contains +1 MHZ above the max non turbo frequency, 449 * with control value corresponding to max turbo ratio. But 450 * when cpufreq set policy is called, it will call with this 451 * max frequency, which will cause a reduced performance as 452 * this driver uses real max turbo frequency as the max 453 * frequency. So correct this frequency in _PSS table to 454 * correct max turbo frequency based on the turbo state. 455 * Also need to convert to MHz as _PSS freq is in MHz. 456 */ 457 if (!global.turbo_disabled) 458 cpu->acpi_perf_data.states[0].core_frequency = 459 policy->cpuinfo.max_freq / 1000; 460 cpu->valid_pss_table = true; 461 pr_debug("_PPC limits will be enforced\n"); 462 463 return; 464 465 err: 466 cpu->valid_pss_table = false; 467 acpi_processor_unregister_performance(policy->cpu); 468 } 469 470 static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 471 { 472 struct cpudata *cpu; 473 474 cpu = all_cpu_data[policy->cpu]; 475 if (!cpu->valid_pss_table) 476 return; 477 478 acpi_processor_unregister_performance(policy->cpu); 479 } 480 #else /* CONFIG_ACPI */ 481 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 482 { 483 } 484 485 static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 486 { 487 } 488 489 static inline bool intel_pstate_acpi_pm_profile_server(void) 490 { 491 return false; 492 } 493 #endif /* CONFIG_ACPI */ 494 495 #ifndef CONFIG_ACPI_CPPC_LIB 496 static int intel_pstate_get_cppc_guranteed(int cpu) 497 { 498 return -ENOTSUPP; 499 } 500 #endif /* CONFIG_ACPI_CPPC_LIB */ 501 502 static inline void update_turbo_state(void) 503 { 504 u64 misc_en; 505 struct cpudata *cpu; 506 507 cpu = all_cpu_data[0]; 508 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 509 global.turbo_disabled = 510 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 511 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 512 } 513 514 static int min_perf_pct_min(void) 515 { 516 struct cpudata *cpu = all_cpu_data[0]; 517 int turbo_pstate = cpu->pstate.turbo_pstate; 518 519 return turbo_pstate ? 520 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0; 521 } 522 523 static s16 intel_pstate_get_epb(struct cpudata *cpu_data) 524 { 525 u64 epb; 526 int ret; 527 528 if (!static_cpu_has(X86_FEATURE_EPB)) 529 return -ENXIO; 530 531 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 532 if (ret) 533 return (s16)ret; 534 535 return (s16)(epb & 0x0f); 536 } 537 538 static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) 539 { 540 s16 epp; 541 542 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 543 /* 544 * When hwp_req_data is 0, means that caller didn't read 545 * MSR_HWP_REQUEST, so need to read and get EPP. 546 */ 547 if (!hwp_req_data) { 548 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, 549 &hwp_req_data); 550 if (epp) 551 return epp; 552 } 553 epp = (hwp_req_data >> 24) & 0xff; 554 } else { 555 /* When there is no EPP present, HWP uses EPB settings */ 556 epp = intel_pstate_get_epb(cpu_data); 557 } 558 559 return epp; 560 } 561 562 static int intel_pstate_set_epb(int cpu, s16 pref) 563 { 564 u64 epb; 565 int ret; 566 567 if (!static_cpu_has(X86_FEATURE_EPB)) 568 return -ENXIO; 569 570 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 571 if (ret) 572 return ret; 573 574 epb = (epb & ~0x0f) | pref; 575 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb); 576 577 return 0; 578 } 579 580 /* 581 * EPP/EPB display strings corresponding to EPP index in the 582 * energy_perf_strings[] 583 * index String 584 *------------------------------------- 585 * 0 default 586 * 1 performance 587 * 2 balance_performance 588 * 3 balance_power 589 * 4 power 590 */ 591 static const char * const energy_perf_strings[] = { 592 "default", 593 "performance", 594 "balance_performance", 595 "balance_power", 596 "power", 597 NULL 598 }; 599 static const unsigned int epp_values[] = { 600 HWP_EPP_PERFORMANCE, 601 HWP_EPP_BALANCE_PERFORMANCE, 602 HWP_EPP_BALANCE_POWERSAVE, 603 HWP_EPP_POWERSAVE 604 }; 605 606 static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) 607 { 608 s16 epp; 609 int index = -EINVAL; 610 611 epp = intel_pstate_get_epp(cpu_data, 0); 612 if (epp < 0) 613 return epp; 614 615 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 616 if (epp == HWP_EPP_PERFORMANCE) 617 return 1; 618 if (epp <= HWP_EPP_BALANCE_PERFORMANCE) 619 return 2; 620 if (epp <= HWP_EPP_BALANCE_POWERSAVE) 621 return 3; 622 else 623 return 4; 624 } else if (static_cpu_has(X86_FEATURE_EPB)) { 625 /* 626 * Range: 627 * 0x00-0x03 : Performance 628 * 0x04-0x07 : Balance performance 629 * 0x08-0x0B : Balance power 630 * 0x0C-0x0F : Power 631 * The EPB is a 4 bit value, but our ranges restrict the 632 * value which can be set. Here only using top two bits 633 * effectively. 634 */ 635 index = (epp >> 2) + 1; 636 } 637 638 return index; 639 } 640 641 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, 642 int pref_index) 643 { 644 int epp = -EINVAL; 645 int ret; 646 647 if (!pref_index) 648 epp = cpu_data->epp_default; 649 650 mutex_lock(&intel_pstate_limits_lock); 651 652 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 653 u64 value; 654 655 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); 656 if (ret) 657 goto return_pref; 658 659 value &= ~GENMASK_ULL(31, 24); 660 661 if (epp == -EINVAL) 662 epp = epp_values[pref_index - 1]; 663 664 value |= (u64)epp << 24; 665 ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); 666 } else { 667 if (epp == -EINVAL) 668 epp = (pref_index - 1) << 2; 669 ret = intel_pstate_set_epb(cpu_data->cpu, epp); 670 } 671 return_pref: 672 mutex_unlock(&intel_pstate_limits_lock); 673 674 return ret; 675 } 676 677 static ssize_t show_energy_performance_available_preferences( 678 struct cpufreq_policy *policy, char *buf) 679 { 680 int i = 0; 681 int ret = 0; 682 683 while (energy_perf_strings[i] != NULL) 684 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]); 685 686 ret += sprintf(&buf[ret], "\n"); 687 688 return ret; 689 } 690 691 cpufreq_freq_attr_ro(energy_performance_available_preferences); 692 693 static ssize_t store_energy_performance_preference( 694 struct cpufreq_policy *policy, const char *buf, size_t count) 695 { 696 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 697 char str_preference[21]; 698 int ret; 699 700 ret = sscanf(buf, "%20s", str_preference); 701 if (ret != 1) 702 return -EINVAL; 703 704 ret = match_string(energy_perf_strings, -1, str_preference); 705 if (ret < 0) 706 return ret; 707 708 intel_pstate_set_energy_pref_index(cpu_data, ret); 709 return count; 710 } 711 712 static ssize_t show_energy_performance_preference( 713 struct cpufreq_policy *policy, char *buf) 714 { 715 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 716 int preference; 717 718 preference = intel_pstate_get_energy_pref_index(cpu_data); 719 if (preference < 0) 720 return preference; 721 722 return sprintf(buf, "%s\n", energy_perf_strings[preference]); 723 } 724 725 cpufreq_freq_attr_rw(energy_performance_preference); 726 727 static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf) 728 { 729 struct cpudata *cpu; 730 u64 cap; 731 int ratio; 732 733 ratio = intel_pstate_get_cppc_guranteed(policy->cpu); 734 if (ratio <= 0) { 735 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap); 736 ratio = HWP_GUARANTEED_PERF(cap); 737 } 738 739 cpu = all_cpu_data[policy->cpu]; 740 741 return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling); 742 } 743 744 cpufreq_freq_attr_ro(base_frequency); 745 746 static struct freq_attr *hwp_cpufreq_attrs[] = { 747 &energy_performance_preference, 748 &energy_performance_available_preferences, 749 &base_frequency, 750 NULL, 751 }; 752 753 static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, 754 int *current_max) 755 { 756 u64 cap; 757 758 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 759 WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); 760 if (global.no_turbo) 761 *current_max = HWP_GUARANTEED_PERF(cap); 762 else 763 *current_max = HWP_HIGHEST_PERF(cap); 764 765 *phy_max = HWP_HIGHEST_PERF(cap); 766 } 767 768 static void intel_pstate_hwp_set(unsigned int cpu) 769 { 770 struct cpudata *cpu_data = all_cpu_data[cpu]; 771 int max, min; 772 u64 value; 773 s16 epp; 774 775 max = cpu_data->max_perf_ratio; 776 min = cpu_data->min_perf_ratio; 777 778 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) 779 min = max; 780 781 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 782 783 value &= ~HWP_MIN_PERF(~0L); 784 value |= HWP_MIN_PERF(min); 785 786 value &= ~HWP_MAX_PERF(~0L); 787 value |= HWP_MAX_PERF(max); 788 789 if (cpu_data->epp_policy == cpu_data->policy) 790 goto skip_epp; 791 792 cpu_data->epp_policy = cpu_data->policy; 793 794 if (cpu_data->epp_saved >= 0) { 795 epp = cpu_data->epp_saved; 796 cpu_data->epp_saved = -EINVAL; 797 goto update_epp; 798 } 799 800 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { 801 epp = intel_pstate_get_epp(cpu_data, value); 802 cpu_data->epp_powersave = epp; 803 /* If EPP read was failed, then don't try to write */ 804 if (epp < 0) 805 goto skip_epp; 806 807 epp = 0; 808 } else { 809 /* skip setting EPP, when saved value is invalid */ 810 if (cpu_data->epp_powersave < 0) 811 goto skip_epp; 812 813 /* 814 * No need to restore EPP when it is not zero. This 815 * means: 816 * - Policy is not changed 817 * - user has manually changed 818 * - Error reading EPB 819 */ 820 epp = intel_pstate_get_epp(cpu_data, value); 821 if (epp) 822 goto skip_epp; 823 824 epp = cpu_data->epp_powersave; 825 } 826 update_epp: 827 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 828 value &= ~GENMASK_ULL(31, 24); 829 value |= (u64)epp << 24; 830 } else { 831 intel_pstate_set_epb(cpu, epp); 832 } 833 skip_epp: 834 WRITE_ONCE(cpu_data->hwp_req_cached, value); 835 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 836 } 837 838 static void intel_pstate_hwp_force_min_perf(int cpu) 839 { 840 u64 value; 841 int min_perf; 842 843 value = all_cpu_data[cpu]->hwp_req_cached; 844 value &= ~GENMASK_ULL(31, 0); 845 min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached); 846 847 /* Set hwp_max = hwp_min */ 848 value |= HWP_MAX_PERF(min_perf); 849 value |= HWP_MIN_PERF(min_perf); 850 851 /* Set EPP/EPB to min */ 852 if (static_cpu_has(X86_FEATURE_HWP_EPP)) 853 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); 854 else 855 intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE); 856 857 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 858 } 859 860 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) 861 { 862 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 863 864 if (!hwp_active) 865 return 0; 866 867 cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0); 868 869 return 0; 870 } 871 872 static void intel_pstate_hwp_enable(struct cpudata *cpudata); 873 874 static int intel_pstate_resume(struct cpufreq_policy *policy) 875 { 876 if (!hwp_active) 877 return 0; 878 879 mutex_lock(&intel_pstate_limits_lock); 880 881 if (policy->cpu == 0) 882 intel_pstate_hwp_enable(all_cpu_data[policy->cpu]); 883 884 all_cpu_data[policy->cpu]->epp_policy = 0; 885 intel_pstate_hwp_set(policy->cpu); 886 887 mutex_unlock(&intel_pstate_limits_lock); 888 889 return 0; 890 } 891 892 static void intel_pstate_update_policies(void) 893 { 894 int cpu; 895 896 for_each_possible_cpu(cpu) 897 cpufreq_update_policy(cpu); 898 } 899 900 /************************** sysfs begin ************************/ 901 #define show_one(file_name, object) \ 902 static ssize_t show_##file_name \ 903 (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ 904 { \ 905 return sprintf(buf, "%u\n", global.object); \ 906 } 907 908 static ssize_t intel_pstate_show_status(char *buf); 909 static int intel_pstate_update_status(const char *buf, size_t size); 910 911 static ssize_t show_status(struct kobject *kobj, 912 struct kobj_attribute *attr, char *buf) 913 { 914 ssize_t ret; 915 916 mutex_lock(&intel_pstate_driver_lock); 917 ret = intel_pstate_show_status(buf); 918 mutex_unlock(&intel_pstate_driver_lock); 919 920 return ret; 921 } 922 923 static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, 924 const char *buf, size_t count) 925 { 926 char *p = memchr(buf, '\n', count); 927 int ret; 928 929 mutex_lock(&intel_pstate_driver_lock); 930 ret = intel_pstate_update_status(buf, p ? p - buf : count); 931 mutex_unlock(&intel_pstate_driver_lock); 932 933 return ret < 0 ? ret : count; 934 } 935 936 static ssize_t show_turbo_pct(struct kobject *kobj, 937 struct kobj_attribute *attr, char *buf) 938 { 939 struct cpudata *cpu; 940 int total, no_turbo, turbo_pct; 941 uint32_t turbo_fp; 942 943 mutex_lock(&intel_pstate_driver_lock); 944 945 if (!intel_pstate_driver) { 946 mutex_unlock(&intel_pstate_driver_lock); 947 return -EAGAIN; 948 } 949 950 cpu = all_cpu_data[0]; 951 952 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 953 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; 954 turbo_fp = div_fp(no_turbo, total); 955 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); 956 957 mutex_unlock(&intel_pstate_driver_lock); 958 959 return sprintf(buf, "%u\n", turbo_pct); 960 } 961 962 static ssize_t show_num_pstates(struct kobject *kobj, 963 struct kobj_attribute *attr, char *buf) 964 { 965 struct cpudata *cpu; 966 int total; 967 968 mutex_lock(&intel_pstate_driver_lock); 969 970 if (!intel_pstate_driver) { 971 mutex_unlock(&intel_pstate_driver_lock); 972 return -EAGAIN; 973 } 974 975 cpu = all_cpu_data[0]; 976 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 977 978 mutex_unlock(&intel_pstate_driver_lock); 979 980 return sprintf(buf, "%u\n", total); 981 } 982 983 static ssize_t show_no_turbo(struct kobject *kobj, 984 struct kobj_attribute *attr, char *buf) 985 { 986 ssize_t ret; 987 988 mutex_lock(&intel_pstate_driver_lock); 989 990 if (!intel_pstate_driver) { 991 mutex_unlock(&intel_pstate_driver_lock); 992 return -EAGAIN; 993 } 994 995 update_turbo_state(); 996 if (global.turbo_disabled) 997 ret = sprintf(buf, "%u\n", global.turbo_disabled); 998 else 999 ret = sprintf(buf, "%u\n", global.no_turbo); 1000 1001 mutex_unlock(&intel_pstate_driver_lock); 1002 1003 return ret; 1004 } 1005 1006 static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, 1007 const char *buf, size_t count) 1008 { 1009 unsigned int input; 1010 int ret; 1011 1012 ret = sscanf(buf, "%u", &input); 1013 if (ret != 1) 1014 return -EINVAL; 1015 1016 mutex_lock(&intel_pstate_driver_lock); 1017 1018 if (!intel_pstate_driver) { 1019 mutex_unlock(&intel_pstate_driver_lock); 1020 return -EAGAIN; 1021 } 1022 1023 mutex_lock(&intel_pstate_limits_lock); 1024 1025 update_turbo_state(); 1026 if (global.turbo_disabled) { 1027 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 1028 mutex_unlock(&intel_pstate_limits_lock); 1029 mutex_unlock(&intel_pstate_driver_lock); 1030 return -EPERM; 1031 } 1032 1033 global.no_turbo = clamp_t(int, input, 0, 1); 1034 1035 if (global.no_turbo) { 1036 struct cpudata *cpu = all_cpu_data[0]; 1037 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; 1038 1039 /* Squash the global minimum into the permitted range. */ 1040 if (global.min_perf_pct > pct) 1041 global.min_perf_pct = pct; 1042 } 1043 1044 mutex_unlock(&intel_pstate_limits_lock); 1045 1046 intel_pstate_update_policies(); 1047 1048 mutex_unlock(&intel_pstate_driver_lock); 1049 1050 return count; 1051 } 1052 1053 static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, 1054 const char *buf, size_t count) 1055 { 1056 unsigned int input; 1057 int ret; 1058 1059 ret = sscanf(buf, "%u", &input); 1060 if (ret != 1) 1061 return -EINVAL; 1062 1063 mutex_lock(&intel_pstate_driver_lock); 1064 1065 if (!intel_pstate_driver) { 1066 mutex_unlock(&intel_pstate_driver_lock); 1067 return -EAGAIN; 1068 } 1069 1070 mutex_lock(&intel_pstate_limits_lock); 1071 1072 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100); 1073 1074 mutex_unlock(&intel_pstate_limits_lock); 1075 1076 intel_pstate_update_policies(); 1077 1078 mutex_unlock(&intel_pstate_driver_lock); 1079 1080 return count; 1081 } 1082 1083 static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, 1084 const char *buf, size_t count) 1085 { 1086 unsigned int input; 1087 int ret; 1088 1089 ret = sscanf(buf, "%u", &input); 1090 if (ret != 1) 1091 return -EINVAL; 1092 1093 mutex_lock(&intel_pstate_driver_lock); 1094 1095 if (!intel_pstate_driver) { 1096 mutex_unlock(&intel_pstate_driver_lock); 1097 return -EAGAIN; 1098 } 1099 1100 mutex_lock(&intel_pstate_limits_lock); 1101 1102 global.min_perf_pct = clamp_t(int, input, 1103 min_perf_pct_min(), global.max_perf_pct); 1104 1105 mutex_unlock(&intel_pstate_limits_lock); 1106 1107 intel_pstate_update_policies(); 1108 1109 mutex_unlock(&intel_pstate_driver_lock); 1110 1111 return count; 1112 } 1113 1114 static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, 1115 struct kobj_attribute *attr, char *buf) 1116 { 1117 return sprintf(buf, "%u\n", hwp_boost); 1118 } 1119 1120 static ssize_t store_hwp_dynamic_boost(struct kobject *a, 1121 struct kobj_attribute *b, 1122 const char *buf, size_t count) 1123 { 1124 unsigned int input; 1125 int ret; 1126 1127 ret = kstrtouint(buf, 10, &input); 1128 if (ret) 1129 return ret; 1130 1131 mutex_lock(&intel_pstate_driver_lock); 1132 hwp_boost = !!input; 1133 intel_pstate_update_policies(); 1134 mutex_unlock(&intel_pstate_driver_lock); 1135 1136 return count; 1137 } 1138 1139 show_one(max_perf_pct, max_perf_pct); 1140 show_one(min_perf_pct, min_perf_pct); 1141 1142 define_one_global_rw(status); 1143 define_one_global_rw(no_turbo); 1144 define_one_global_rw(max_perf_pct); 1145 define_one_global_rw(min_perf_pct); 1146 define_one_global_ro(turbo_pct); 1147 define_one_global_ro(num_pstates); 1148 define_one_global_rw(hwp_dynamic_boost); 1149 1150 static struct attribute *intel_pstate_attributes[] = { 1151 &status.attr, 1152 &no_turbo.attr, 1153 &turbo_pct.attr, 1154 &num_pstates.attr, 1155 NULL 1156 }; 1157 1158 static const struct attribute_group intel_pstate_attr_group = { 1159 .attrs = intel_pstate_attributes, 1160 }; 1161 1162 static void __init intel_pstate_sysfs_expose_params(void) 1163 { 1164 struct kobject *intel_pstate_kobject; 1165 int rc; 1166 1167 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 1168 &cpu_subsys.dev_root->kobj); 1169 if (WARN_ON(!intel_pstate_kobject)) 1170 return; 1171 1172 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 1173 if (WARN_ON(rc)) 1174 return; 1175 1176 /* 1177 * If per cpu limits are enforced there are no global limits, so 1178 * return without creating max/min_perf_pct attributes 1179 */ 1180 if (per_cpu_limits) 1181 return; 1182 1183 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr); 1184 WARN_ON(rc); 1185 1186 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); 1187 WARN_ON(rc); 1188 1189 if (hwp_active) { 1190 rc = sysfs_create_file(intel_pstate_kobject, 1191 &hwp_dynamic_boost.attr); 1192 WARN_ON(rc); 1193 } 1194 } 1195 /************************** sysfs end ************************/ 1196 1197 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 1198 { 1199 /* First disable HWP notification interrupt as we don't process them */ 1200 if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) 1201 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); 1202 1203 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 1204 cpudata->epp_policy = 0; 1205 if (cpudata->epp_default == -EINVAL) 1206 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); 1207 } 1208 1209 #define MSR_IA32_POWER_CTL_BIT_EE 19 1210 1211 /* Disable energy efficiency optimization */ 1212 static void intel_pstate_disable_ee(int cpu) 1213 { 1214 u64 power_ctl; 1215 int ret; 1216 1217 ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); 1218 if (ret) 1219 return; 1220 1221 if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { 1222 pr_info("Disabling energy efficiency optimization\n"); 1223 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); 1224 wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); 1225 } 1226 } 1227 1228 static int atom_get_min_pstate(void) 1229 { 1230 u64 value; 1231 1232 rdmsrl(MSR_ATOM_CORE_RATIOS, value); 1233 return (value >> 8) & 0x7F; 1234 } 1235 1236 static int atom_get_max_pstate(void) 1237 { 1238 u64 value; 1239 1240 rdmsrl(MSR_ATOM_CORE_RATIOS, value); 1241 return (value >> 16) & 0x7F; 1242 } 1243 1244 static int atom_get_turbo_pstate(void) 1245 { 1246 u64 value; 1247 1248 rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value); 1249 return value & 0x7F; 1250 } 1251 1252 static u64 atom_get_val(struct cpudata *cpudata, int pstate) 1253 { 1254 u64 val; 1255 int32_t vid_fp; 1256 u32 vid; 1257 1258 val = (u64)pstate << 8; 1259 if (global.no_turbo && !global.turbo_disabled) 1260 val |= (u64)1 << 32; 1261 1262 vid_fp = cpudata->vid.min + mul_fp( 1263 int_tofp(pstate - cpudata->pstate.min_pstate), 1264 cpudata->vid.ratio); 1265 1266 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 1267 vid = ceiling_fp(vid_fp); 1268 1269 if (pstate > cpudata->pstate.max_pstate) 1270 vid = cpudata->vid.turbo; 1271 1272 return val | vid; 1273 } 1274 1275 static int silvermont_get_scaling(void) 1276 { 1277 u64 value; 1278 int i; 1279 /* Defined in Table 35-6 from SDM (Sept 2015) */ 1280 static int silvermont_freq_table[] = { 1281 83300, 100000, 133300, 116700, 80000}; 1282 1283 rdmsrl(MSR_FSB_FREQ, value); 1284 i = value & 0x7; 1285 WARN_ON(i > 4); 1286 1287 return silvermont_freq_table[i]; 1288 } 1289 1290 static int airmont_get_scaling(void) 1291 { 1292 u64 value; 1293 int i; 1294 /* Defined in Table 35-10 from SDM (Sept 2015) */ 1295 static int airmont_freq_table[] = { 1296 83300, 100000, 133300, 116700, 80000, 1297 93300, 90000, 88900, 87500}; 1298 1299 rdmsrl(MSR_FSB_FREQ, value); 1300 i = value & 0xF; 1301 WARN_ON(i > 8); 1302 1303 return airmont_freq_table[i]; 1304 } 1305 1306 static void atom_get_vid(struct cpudata *cpudata) 1307 { 1308 u64 value; 1309 1310 rdmsrl(MSR_ATOM_CORE_VIDS, value); 1311 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 1312 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 1313 cpudata->vid.ratio = div_fp( 1314 cpudata->vid.max - cpudata->vid.min, 1315 int_tofp(cpudata->pstate.max_pstate - 1316 cpudata->pstate.min_pstate)); 1317 1318 rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value); 1319 cpudata->vid.turbo = value & 0x7f; 1320 } 1321 1322 static int core_get_min_pstate(void) 1323 { 1324 u64 value; 1325 1326 rdmsrl(MSR_PLATFORM_INFO, value); 1327 return (value >> 40) & 0xFF; 1328 } 1329 1330 static int core_get_max_pstate_physical(void) 1331 { 1332 u64 value; 1333 1334 rdmsrl(MSR_PLATFORM_INFO, value); 1335 return (value >> 8) & 0xFF; 1336 } 1337 1338 static int core_get_tdp_ratio(u64 plat_info) 1339 { 1340 /* Check how many TDP levels present */ 1341 if (plat_info & 0x600000000) { 1342 u64 tdp_ctrl; 1343 u64 tdp_ratio; 1344 int tdp_msr; 1345 int err; 1346 1347 /* Get the TDP level (0, 1, 2) to get ratios */ 1348 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); 1349 if (err) 1350 return err; 1351 1352 /* TDP MSR are continuous starting at 0x648 */ 1353 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); 1354 err = rdmsrl_safe(tdp_msr, &tdp_ratio); 1355 if (err) 1356 return err; 1357 1358 /* For level 1 and 2, bits[23:16] contain the ratio */ 1359 if (tdp_ctrl & 0x03) 1360 tdp_ratio >>= 16; 1361 1362 tdp_ratio &= 0xff; /* ratios are only 8 bits long */ 1363 pr_debug("tdp_ratio %x\n", (int)tdp_ratio); 1364 1365 return (int)tdp_ratio; 1366 } 1367 1368 return -ENXIO; 1369 } 1370 1371 static int core_get_max_pstate(void) 1372 { 1373 u64 tar; 1374 u64 plat_info; 1375 int max_pstate; 1376 int tdp_ratio; 1377 int err; 1378 1379 rdmsrl(MSR_PLATFORM_INFO, plat_info); 1380 max_pstate = (plat_info >> 8) & 0xFF; 1381 1382 tdp_ratio = core_get_tdp_ratio(plat_info); 1383 if (tdp_ratio <= 0) 1384 return max_pstate; 1385 1386 if (hwp_active) { 1387 /* Turbo activation ratio is not used on HWP platforms */ 1388 return tdp_ratio; 1389 } 1390 1391 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); 1392 if (!err) { 1393 int tar_levels; 1394 1395 /* Do some sanity checking for safety */ 1396 tar_levels = tar & 0xff; 1397 if (tdp_ratio - 1 == tar_levels) { 1398 max_pstate = tar_levels; 1399 pr_debug("max_pstate=TAC %x\n", max_pstate); 1400 } 1401 } 1402 1403 return max_pstate; 1404 } 1405 1406 static int core_get_turbo_pstate(void) 1407 { 1408 u64 value; 1409 int nont, ret; 1410 1411 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1412 nont = core_get_max_pstate(); 1413 ret = (value) & 255; 1414 if (ret <= nont) 1415 ret = nont; 1416 return ret; 1417 } 1418 1419 static inline int core_get_scaling(void) 1420 { 1421 return 100000; 1422 } 1423 1424 static u64 core_get_val(struct cpudata *cpudata, int pstate) 1425 { 1426 u64 val; 1427 1428 val = (u64)pstate << 8; 1429 if (global.no_turbo && !global.turbo_disabled) 1430 val |= (u64)1 << 32; 1431 1432 return val; 1433 } 1434 1435 static int knl_get_aperf_mperf_shift(void) 1436 { 1437 return 10; 1438 } 1439 1440 static int knl_get_turbo_pstate(void) 1441 { 1442 u64 value; 1443 int nont, ret; 1444 1445 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1446 nont = core_get_max_pstate(); 1447 ret = (((value) >> 8) & 0xFF); 1448 if (ret <= nont) 1449 ret = nont; 1450 return ret; 1451 } 1452 1453 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 1454 { 1455 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 1456 cpu->pstate.current_pstate = pstate; 1457 /* 1458 * Generally, there is no guarantee that this code will always run on 1459 * the CPU being updated, so force the register update to run on the 1460 * right CPU. 1461 */ 1462 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, 1463 pstate_funcs.get_val(cpu, pstate)); 1464 } 1465 1466 static void intel_pstate_set_min_pstate(struct cpudata *cpu) 1467 { 1468 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 1469 } 1470 1471 static void intel_pstate_max_within_limits(struct cpudata *cpu) 1472 { 1473 int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); 1474 1475 update_turbo_state(); 1476 intel_pstate_set_pstate(cpu, pstate); 1477 } 1478 1479 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 1480 { 1481 cpu->pstate.min_pstate = pstate_funcs.get_min(); 1482 cpu->pstate.max_pstate = pstate_funcs.get_max(); 1483 cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); 1484 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 1485 cpu->pstate.scaling = pstate_funcs.get_scaling(); 1486 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; 1487 1488 if (hwp_active && !hwp_mode_bdw) { 1489 unsigned int phy_max, current_max; 1490 1491 intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); 1492 cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; 1493 } else { 1494 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1495 } 1496 1497 if (pstate_funcs.get_aperf_mperf_shift) 1498 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); 1499 1500 if (pstate_funcs.get_vid) 1501 pstate_funcs.get_vid(cpu); 1502 1503 intel_pstate_set_min_pstate(cpu); 1504 } 1505 1506 /* 1507 * Long hold time will keep high perf limits for long time, 1508 * which negatively impacts perf/watt for some workloads, 1509 * like specpower. 3ms is based on experiements on some 1510 * workoads. 1511 */ 1512 static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; 1513 1514 static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) 1515 { 1516 u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); 1517 u32 max_limit = (hwp_req & 0xff00) >> 8; 1518 u32 min_limit = (hwp_req & 0xff); 1519 u32 boost_level1; 1520 1521 /* 1522 * Cases to consider (User changes via sysfs or boot time): 1523 * If, P0 (Turbo max) = P1 (Guaranteed max) = min: 1524 * No boost, return. 1525 * If, P0 (Turbo max) > P1 (Guaranteed max) = min: 1526 * Should result in one level boost only for P0. 1527 * If, P0 (Turbo max) = P1 (Guaranteed max) > min: 1528 * Should result in two level boost: 1529 * (min + p1)/2 and P1. 1530 * If, P0 (Turbo max) > P1 (Guaranteed max) > min: 1531 * Should result in three level boost: 1532 * (min + p1)/2, P1 and P0. 1533 */ 1534 1535 /* If max and min are equal or already at max, nothing to boost */ 1536 if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) 1537 return; 1538 1539 if (!cpu->hwp_boost_min) 1540 cpu->hwp_boost_min = min_limit; 1541 1542 /* level at half way mark between min and guranteed */ 1543 boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; 1544 1545 if (cpu->hwp_boost_min < boost_level1) 1546 cpu->hwp_boost_min = boost_level1; 1547 else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) 1548 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); 1549 else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && 1550 max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) 1551 cpu->hwp_boost_min = max_limit; 1552 else 1553 return; 1554 1555 hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; 1556 wrmsrl(MSR_HWP_REQUEST, hwp_req); 1557 cpu->last_update = cpu->sample.time; 1558 } 1559 1560 static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) 1561 { 1562 if (cpu->hwp_boost_min) { 1563 bool expired; 1564 1565 /* Check if we are idle for hold time to boost down */ 1566 expired = time_after64(cpu->sample.time, cpu->last_update + 1567 hwp_boost_hold_time_ns); 1568 if (expired) { 1569 wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); 1570 cpu->hwp_boost_min = 0; 1571 } 1572 } 1573 cpu->last_update = cpu->sample.time; 1574 } 1575 1576 static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu, 1577 u64 time) 1578 { 1579 cpu->sample.time = time; 1580 1581 if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { 1582 bool do_io = false; 1583 1584 cpu->sched_flags = 0; 1585 /* 1586 * Set iowait_boost flag and update time. Since IO WAIT flag 1587 * is set all the time, we can't just conclude that there is 1588 * some IO bound activity is scheduled on this CPU with just 1589 * one occurrence. If we receive at least two in two 1590 * consecutive ticks, then we treat as boost candidate. 1591 */ 1592 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC)) 1593 do_io = true; 1594 1595 cpu->last_io_update = time; 1596 1597 if (do_io) 1598 intel_pstate_hwp_boost_up(cpu); 1599 1600 } else { 1601 intel_pstate_hwp_boost_down(cpu); 1602 } 1603 } 1604 1605 static inline void intel_pstate_update_util_hwp(struct update_util_data *data, 1606 u64 time, unsigned int flags) 1607 { 1608 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1609 1610 cpu->sched_flags |= flags; 1611 1612 if (smp_processor_id() == cpu->cpu) 1613 intel_pstate_update_util_hwp_local(cpu, time); 1614 } 1615 1616 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) 1617 { 1618 struct sample *sample = &cpu->sample; 1619 1620 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf); 1621 } 1622 1623 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) 1624 { 1625 u64 aperf, mperf; 1626 unsigned long flags; 1627 u64 tsc; 1628 1629 local_irq_save(flags); 1630 rdmsrl(MSR_IA32_APERF, aperf); 1631 rdmsrl(MSR_IA32_MPERF, mperf); 1632 tsc = rdtsc(); 1633 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { 1634 local_irq_restore(flags); 1635 return false; 1636 } 1637 local_irq_restore(flags); 1638 1639 cpu->last_sample_time = cpu->sample.time; 1640 cpu->sample.time = time; 1641 cpu->sample.aperf = aperf; 1642 cpu->sample.mperf = mperf; 1643 cpu->sample.tsc = tsc; 1644 cpu->sample.aperf -= cpu->prev_aperf; 1645 cpu->sample.mperf -= cpu->prev_mperf; 1646 cpu->sample.tsc -= cpu->prev_tsc; 1647 1648 cpu->prev_aperf = aperf; 1649 cpu->prev_mperf = mperf; 1650 cpu->prev_tsc = tsc; 1651 /* 1652 * First time this function is invoked in a given cycle, all of the 1653 * previous sample data fields are equal to zero or stale and they must 1654 * be populated with meaningful numbers for things to work, so assume 1655 * that sample.time will always be reset before setting the utilization 1656 * update hook and make the caller skip the sample then. 1657 */ 1658 if (cpu->last_sample_time) { 1659 intel_pstate_calc_avg_perf(cpu); 1660 return true; 1661 } 1662 return false; 1663 } 1664 1665 static inline int32_t get_avg_frequency(struct cpudata *cpu) 1666 { 1667 return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz); 1668 } 1669 1670 static inline int32_t get_avg_pstate(struct cpudata *cpu) 1671 { 1672 return mul_ext_fp(cpu->pstate.max_pstate_physical, 1673 cpu->sample.core_avg_perf); 1674 } 1675 1676 static inline int32_t get_target_pstate(struct cpudata *cpu) 1677 { 1678 struct sample *sample = &cpu->sample; 1679 int32_t busy_frac; 1680 int target, avg_pstate; 1681 1682 busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift, 1683 sample->tsc); 1684 1685 if (busy_frac < cpu->iowait_boost) 1686 busy_frac = cpu->iowait_boost; 1687 1688 sample->busy_scaled = busy_frac * 100; 1689 1690 target = global.no_turbo || global.turbo_disabled ? 1691 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 1692 target += target >> 2; 1693 target = mul_fp(target, busy_frac); 1694 if (target < cpu->pstate.min_pstate) 1695 target = cpu->pstate.min_pstate; 1696 1697 /* 1698 * If the average P-state during the previous cycle was higher than the 1699 * current target, add 50% of the difference to the target to reduce 1700 * possible performance oscillations and offset possible performance 1701 * loss related to moving the workload from one CPU to another within 1702 * a package/module. 1703 */ 1704 avg_pstate = get_avg_pstate(cpu); 1705 if (avg_pstate > target) 1706 target += (avg_pstate - target) >> 1; 1707 1708 return target; 1709 } 1710 1711 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) 1712 { 1713 int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio); 1714 int max_pstate = max(min_pstate, cpu->max_perf_ratio); 1715 1716 return clamp_t(int, pstate, min_pstate, max_pstate); 1717 } 1718 1719 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) 1720 { 1721 if (pstate == cpu->pstate.current_pstate) 1722 return; 1723 1724 cpu->pstate.current_pstate = pstate; 1725 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); 1726 } 1727 1728 static void intel_pstate_adjust_pstate(struct cpudata *cpu) 1729 { 1730 int from = cpu->pstate.current_pstate; 1731 struct sample *sample; 1732 int target_pstate; 1733 1734 update_turbo_state(); 1735 1736 target_pstate = get_target_pstate(cpu); 1737 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 1738 trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); 1739 intel_pstate_update_pstate(cpu, target_pstate); 1740 1741 sample = &cpu->sample; 1742 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), 1743 fp_toint(sample->busy_scaled), 1744 from, 1745 cpu->pstate.current_pstate, 1746 sample->mperf, 1747 sample->aperf, 1748 sample->tsc, 1749 get_avg_frequency(cpu), 1750 fp_toint(cpu->iowait_boost * 100)); 1751 } 1752 1753 static void intel_pstate_update_util(struct update_util_data *data, u64 time, 1754 unsigned int flags) 1755 { 1756 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1757 u64 delta_ns; 1758 1759 /* Don't allow remote callbacks */ 1760 if (smp_processor_id() != cpu->cpu) 1761 return; 1762 1763 delta_ns = time - cpu->last_update; 1764 if (flags & SCHED_CPUFREQ_IOWAIT) { 1765 /* Start over if the CPU may have been idle. */ 1766 if (delta_ns > TICK_NSEC) { 1767 cpu->iowait_boost = ONE_EIGHTH_FP; 1768 } else if (cpu->iowait_boost >= ONE_EIGHTH_FP) { 1769 cpu->iowait_boost <<= 1; 1770 if (cpu->iowait_boost > int_tofp(1)) 1771 cpu->iowait_boost = int_tofp(1); 1772 } else { 1773 cpu->iowait_boost = ONE_EIGHTH_FP; 1774 } 1775 } else if (cpu->iowait_boost) { 1776 /* Clear iowait_boost if the CPU may have been idle. */ 1777 if (delta_ns > TICK_NSEC) 1778 cpu->iowait_boost = 0; 1779 else 1780 cpu->iowait_boost >>= 1; 1781 } 1782 cpu->last_update = time; 1783 delta_ns = time - cpu->sample.time; 1784 if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL) 1785 return; 1786 1787 if (intel_pstate_sample(cpu, time)) 1788 intel_pstate_adjust_pstate(cpu); 1789 } 1790 1791 static struct pstate_funcs core_funcs = { 1792 .get_max = core_get_max_pstate, 1793 .get_max_physical = core_get_max_pstate_physical, 1794 .get_min = core_get_min_pstate, 1795 .get_turbo = core_get_turbo_pstate, 1796 .get_scaling = core_get_scaling, 1797 .get_val = core_get_val, 1798 }; 1799 1800 static const struct pstate_funcs silvermont_funcs = { 1801 .get_max = atom_get_max_pstate, 1802 .get_max_physical = atom_get_max_pstate, 1803 .get_min = atom_get_min_pstate, 1804 .get_turbo = atom_get_turbo_pstate, 1805 .get_val = atom_get_val, 1806 .get_scaling = silvermont_get_scaling, 1807 .get_vid = atom_get_vid, 1808 }; 1809 1810 static const struct pstate_funcs airmont_funcs = { 1811 .get_max = atom_get_max_pstate, 1812 .get_max_physical = atom_get_max_pstate, 1813 .get_min = atom_get_min_pstate, 1814 .get_turbo = atom_get_turbo_pstate, 1815 .get_val = atom_get_val, 1816 .get_scaling = airmont_get_scaling, 1817 .get_vid = atom_get_vid, 1818 }; 1819 1820 static const struct pstate_funcs knl_funcs = { 1821 .get_max = core_get_max_pstate, 1822 .get_max_physical = core_get_max_pstate_physical, 1823 .get_min = core_get_min_pstate, 1824 .get_turbo = knl_get_turbo_pstate, 1825 .get_aperf_mperf_shift = knl_get_aperf_mperf_shift, 1826 .get_scaling = core_get_scaling, 1827 .get_val = core_get_val, 1828 }; 1829 1830 #define ICPU(model, policy) \ 1831 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 1832 (unsigned long)&policy } 1833 1834 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 1835 ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), 1836 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), 1837 ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs), 1838 ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), 1839 ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), 1840 ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), 1841 ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), 1842 ICPU(INTEL_FAM6_HASWELL_X, core_funcs), 1843 ICPU(INTEL_FAM6_HASWELL_ULT, core_funcs), 1844 ICPU(INTEL_FAM6_HASWELL_GT3E, core_funcs), 1845 ICPU(INTEL_FAM6_BROADWELL_GT3E, core_funcs), 1846 ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), 1847 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_funcs), 1848 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), 1849 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), 1850 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), 1851 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), 1852 ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), 1853 ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), 1854 ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs), 1855 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1856 {} 1857 }; 1858 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 1859 1860 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { 1861 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), 1862 ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), 1863 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1864 {} 1865 }; 1866 1867 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { 1868 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs), 1869 {} 1870 }; 1871 1872 static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { 1873 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1874 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), 1875 {} 1876 }; 1877 1878 static int intel_pstate_init_cpu(unsigned int cpunum) 1879 { 1880 struct cpudata *cpu; 1881 1882 cpu = all_cpu_data[cpunum]; 1883 1884 if (!cpu) { 1885 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL); 1886 if (!cpu) 1887 return -ENOMEM; 1888 1889 all_cpu_data[cpunum] = cpu; 1890 1891 cpu->epp_default = -EINVAL; 1892 cpu->epp_powersave = -EINVAL; 1893 cpu->epp_saved = -EINVAL; 1894 } 1895 1896 cpu = all_cpu_data[cpunum]; 1897 1898 cpu->cpu = cpunum; 1899 1900 if (hwp_active) { 1901 const struct x86_cpu_id *id; 1902 1903 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); 1904 if (id) 1905 intel_pstate_disable_ee(cpunum); 1906 1907 intel_pstate_hwp_enable(cpu); 1908 1909 id = x86_match_cpu(intel_pstate_hwp_boost_ids); 1910 if (id && intel_pstate_acpi_pm_profile_server()) 1911 hwp_boost = true; 1912 } 1913 1914 intel_pstate_get_cpu_pstates(cpu); 1915 1916 pr_debug("controlling: cpu %d\n", cpunum); 1917 1918 return 0; 1919 } 1920 1921 static void intel_pstate_set_update_util_hook(unsigned int cpu_num) 1922 { 1923 struct cpudata *cpu = all_cpu_data[cpu_num]; 1924 1925 if (hwp_active && !hwp_boost) 1926 return; 1927 1928 if (cpu->update_util_set) 1929 return; 1930 1931 /* Prevent intel_pstate_update_util() from using stale data. */ 1932 cpu->sample.time = 0; 1933 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, 1934 (hwp_active ? 1935 intel_pstate_update_util_hwp : 1936 intel_pstate_update_util)); 1937 cpu->update_util_set = true; 1938 } 1939 1940 static void intel_pstate_clear_update_util_hook(unsigned int cpu) 1941 { 1942 struct cpudata *cpu_data = all_cpu_data[cpu]; 1943 1944 if (!cpu_data->update_util_set) 1945 return; 1946 1947 cpufreq_remove_update_util_hook(cpu); 1948 cpu_data->update_util_set = false; 1949 synchronize_rcu(); 1950 } 1951 1952 static int intel_pstate_get_max_freq(struct cpudata *cpu) 1953 { 1954 return global.turbo_disabled || global.no_turbo ? 1955 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 1956 } 1957 1958 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, 1959 struct cpudata *cpu) 1960 { 1961 int max_freq = intel_pstate_get_max_freq(cpu); 1962 int32_t max_policy_perf, min_policy_perf; 1963 int max_state, turbo_max; 1964 1965 /* 1966 * HWP needs some special consideration, because on BDX the 1967 * HWP_REQUEST uses abstract value to represent performance 1968 * rather than pure ratios. 1969 */ 1970 if (hwp_active) { 1971 intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); 1972 } else { 1973 max_state = global.no_turbo || global.turbo_disabled ? 1974 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 1975 turbo_max = cpu->pstate.turbo_pstate; 1976 } 1977 1978 max_policy_perf = max_state * policy->max / max_freq; 1979 if (policy->max == policy->min) { 1980 min_policy_perf = max_policy_perf; 1981 } else { 1982 min_policy_perf = max_state * policy->min / max_freq; 1983 min_policy_perf = clamp_t(int32_t, min_policy_perf, 1984 0, max_policy_perf); 1985 } 1986 1987 pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", 1988 policy->cpu, max_state, 1989 min_policy_perf, max_policy_perf); 1990 1991 /* Normalize user input to [min_perf, max_perf] */ 1992 if (per_cpu_limits) { 1993 cpu->min_perf_ratio = min_policy_perf; 1994 cpu->max_perf_ratio = max_policy_perf; 1995 } else { 1996 int32_t global_min, global_max; 1997 1998 /* Global limits are in percent of the maximum turbo P-state. */ 1999 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); 2000 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); 2001 global_min = clamp_t(int32_t, global_min, 0, global_max); 2002 2003 pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, 2004 global_min, global_max); 2005 2006 cpu->min_perf_ratio = max(min_policy_perf, global_min); 2007 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf); 2008 cpu->max_perf_ratio = min(max_policy_perf, global_max); 2009 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio); 2010 2011 /* Make sure min_perf <= max_perf */ 2012 cpu->min_perf_ratio = min(cpu->min_perf_ratio, 2013 cpu->max_perf_ratio); 2014 2015 } 2016 pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, 2017 cpu->max_perf_ratio, 2018 cpu->min_perf_ratio); 2019 } 2020 2021 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 2022 { 2023 struct cpudata *cpu; 2024 2025 if (!policy->cpuinfo.max_freq) 2026 return -ENODEV; 2027 2028 pr_debug("set_policy cpuinfo.max %u policy->max %u\n", 2029 policy->cpuinfo.max_freq, policy->max); 2030 2031 cpu = all_cpu_data[policy->cpu]; 2032 cpu->policy = policy->policy; 2033 2034 mutex_lock(&intel_pstate_limits_lock); 2035 2036 intel_pstate_update_perf_limits(policy, cpu); 2037 2038 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 2039 /* 2040 * NOHZ_FULL CPUs need this as the governor callback may not 2041 * be invoked on them. 2042 */ 2043 intel_pstate_clear_update_util_hook(policy->cpu); 2044 intel_pstate_max_within_limits(cpu); 2045 } else { 2046 intel_pstate_set_update_util_hook(policy->cpu); 2047 } 2048 2049 if (hwp_active) { 2050 /* 2051 * When hwp_boost was active before and dynamically it 2052 * was turned off, in that case we need to clear the 2053 * update util hook. 2054 */ 2055 if (!hwp_boost) 2056 intel_pstate_clear_update_util_hook(policy->cpu); 2057 intel_pstate_hwp_set(policy->cpu); 2058 } 2059 2060 mutex_unlock(&intel_pstate_limits_lock); 2061 2062 return 0; 2063 } 2064 2065 static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, 2066 struct cpudata *cpu) 2067 { 2068 if (!hwp_active && 2069 cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && 2070 policy->max < policy->cpuinfo.max_freq && 2071 policy->max > cpu->pstate.max_freq) { 2072 pr_debug("policy->max > max non turbo frequency\n"); 2073 policy->max = policy->cpuinfo.max_freq; 2074 } 2075 } 2076 2077 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 2078 { 2079 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2080 2081 update_turbo_state(); 2082 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 2083 intel_pstate_get_max_freq(cpu)); 2084 2085 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 2086 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 2087 return -EINVAL; 2088 2089 intel_pstate_adjust_policy_max(policy, cpu); 2090 2091 return 0; 2092 } 2093 2094 static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy) 2095 { 2096 intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); 2097 } 2098 2099 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 2100 { 2101 pr_debug("CPU %d exiting\n", policy->cpu); 2102 2103 intel_pstate_clear_update_util_hook(policy->cpu); 2104 if (hwp_active) { 2105 intel_pstate_hwp_save_state(policy); 2106 intel_pstate_hwp_force_min_perf(policy->cpu); 2107 } else { 2108 intel_cpufreq_stop_cpu(policy); 2109 } 2110 } 2111 2112 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) 2113 { 2114 intel_pstate_exit_perf_limits(policy); 2115 2116 policy->fast_switch_possible = false; 2117 2118 return 0; 2119 } 2120 2121 static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) 2122 { 2123 struct cpudata *cpu; 2124 int rc; 2125 2126 rc = intel_pstate_init_cpu(policy->cpu); 2127 if (rc) 2128 return rc; 2129 2130 cpu = all_cpu_data[policy->cpu]; 2131 2132 cpu->max_perf_ratio = 0xFF; 2133 cpu->min_perf_ratio = 0; 2134 2135 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 2136 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 2137 2138 /* cpuinfo and default policy values */ 2139 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 2140 update_turbo_state(); 2141 policy->cpuinfo.max_freq = global.turbo_disabled ? 2142 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 2143 policy->cpuinfo.max_freq *= cpu->pstate.scaling; 2144 2145 if (hwp_active) { 2146 unsigned int max_freq; 2147 2148 max_freq = global.turbo_disabled ? 2149 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 2150 if (max_freq < policy->cpuinfo.max_freq) 2151 policy->cpuinfo.max_freq = max_freq; 2152 } 2153 2154 intel_pstate_init_acpi_perf_limits(policy); 2155 2156 policy->fast_switch_possible = true; 2157 2158 return 0; 2159 } 2160 2161 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 2162 { 2163 int ret = __intel_pstate_cpu_init(policy); 2164 2165 if (ret) 2166 return ret; 2167 2168 if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE)) 2169 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 2170 else 2171 policy->policy = CPUFREQ_POLICY_POWERSAVE; 2172 2173 return 0; 2174 } 2175 2176 static struct cpufreq_driver intel_pstate = { 2177 .flags = CPUFREQ_CONST_LOOPS, 2178 .verify = intel_pstate_verify_policy, 2179 .setpolicy = intel_pstate_set_policy, 2180 .suspend = intel_pstate_hwp_save_state, 2181 .resume = intel_pstate_resume, 2182 .init = intel_pstate_cpu_init, 2183 .exit = intel_pstate_cpu_exit, 2184 .stop_cpu = intel_pstate_stop_cpu, 2185 .name = "intel_pstate", 2186 }; 2187 2188 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) 2189 { 2190 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2191 2192 update_turbo_state(); 2193 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 2194 intel_pstate_get_max_freq(cpu)); 2195 2196 intel_pstate_adjust_policy_max(policy, cpu); 2197 2198 intel_pstate_update_perf_limits(policy, cpu); 2199 2200 return 0; 2201 } 2202 2203 /* Use of trace in passive mode: 2204 * 2205 * In passive mode the trace core_busy field (also known as the 2206 * performance field, and lablelled as such on the graphs; also known as 2207 * core_avg_perf) is not needed and so is re-assigned to indicate if the 2208 * driver call was via the normal or fast switch path. Various graphs 2209 * output from the intel_pstate_tracer.py utility that include core_busy 2210 * (or performance or core_avg_perf) have a fixed y-axis from 0 to 100%, 2211 * so we use 10 to indicate the the normal path through the driver, and 2212 * 90 to indicate the fast switch path through the driver. 2213 * The scaled_busy field is not used, and is set to 0. 2214 */ 2215 2216 #define INTEL_PSTATE_TRACE_TARGET 10 2217 #define INTEL_PSTATE_TRACE_FAST_SWITCH 90 2218 2219 static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate) 2220 { 2221 struct sample *sample; 2222 2223 if (!trace_pstate_sample_enabled()) 2224 return; 2225 2226 if (!intel_pstate_sample(cpu, ktime_get())) 2227 return; 2228 2229 sample = &cpu->sample; 2230 trace_pstate_sample(trace_type, 2231 0, 2232 old_pstate, 2233 cpu->pstate.current_pstate, 2234 sample->mperf, 2235 sample->aperf, 2236 sample->tsc, 2237 get_avg_frequency(cpu), 2238 fp_toint(cpu->iowait_boost * 100)); 2239 } 2240 2241 static int intel_cpufreq_target(struct cpufreq_policy *policy, 2242 unsigned int target_freq, 2243 unsigned int relation) 2244 { 2245 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2246 struct cpufreq_freqs freqs; 2247 int target_pstate, old_pstate; 2248 2249 update_turbo_state(); 2250 2251 freqs.old = policy->cur; 2252 freqs.new = target_freq; 2253 2254 cpufreq_freq_transition_begin(policy, &freqs); 2255 switch (relation) { 2256 case CPUFREQ_RELATION_L: 2257 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); 2258 break; 2259 case CPUFREQ_RELATION_H: 2260 target_pstate = freqs.new / cpu->pstate.scaling; 2261 break; 2262 default: 2263 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); 2264 break; 2265 } 2266 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 2267 old_pstate = cpu->pstate.current_pstate; 2268 if (target_pstate != cpu->pstate.current_pstate) { 2269 cpu->pstate.current_pstate = target_pstate; 2270 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, 2271 pstate_funcs.get_val(cpu, target_pstate)); 2272 } 2273 freqs.new = target_pstate * cpu->pstate.scaling; 2274 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate); 2275 cpufreq_freq_transition_end(policy, &freqs, false); 2276 2277 return 0; 2278 } 2279 2280 static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, 2281 unsigned int target_freq) 2282 { 2283 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2284 int target_pstate, old_pstate; 2285 2286 update_turbo_state(); 2287 2288 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); 2289 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 2290 old_pstate = cpu->pstate.current_pstate; 2291 intel_pstate_update_pstate(cpu, target_pstate); 2292 intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); 2293 return target_pstate * cpu->pstate.scaling; 2294 } 2295 2296 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) 2297 { 2298 int ret = __intel_pstate_cpu_init(policy); 2299 2300 if (ret) 2301 return ret; 2302 2303 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; 2304 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; 2305 /* This reflects the intel_pstate_get_cpu_pstates() setting. */ 2306 policy->cur = policy->cpuinfo.min_freq; 2307 2308 return 0; 2309 } 2310 2311 static struct cpufreq_driver intel_cpufreq = { 2312 .flags = CPUFREQ_CONST_LOOPS, 2313 .verify = intel_cpufreq_verify_policy, 2314 .target = intel_cpufreq_target, 2315 .fast_switch = intel_cpufreq_fast_switch, 2316 .init = intel_cpufreq_cpu_init, 2317 .exit = intel_pstate_cpu_exit, 2318 .stop_cpu = intel_cpufreq_stop_cpu, 2319 .name = "intel_cpufreq", 2320 }; 2321 2322 static struct cpufreq_driver *default_driver = &intel_pstate; 2323 2324 static void intel_pstate_driver_cleanup(void) 2325 { 2326 unsigned int cpu; 2327 2328 get_online_cpus(); 2329 for_each_online_cpu(cpu) { 2330 if (all_cpu_data[cpu]) { 2331 if (intel_pstate_driver == &intel_pstate) 2332 intel_pstate_clear_update_util_hook(cpu); 2333 2334 kfree(all_cpu_data[cpu]); 2335 all_cpu_data[cpu] = NULL; 2336 } 2337 } 2338 put_online_cpus(); 2339 intel_pstate_driver = NULL; 2340 } 2341 2342 static int intel_pstate_register_driver(struct cpufreq_driver *driver) 2343 { 2344 int ret; 2345 2346 memset(&global, 0, sizeof(global)); 2347 global.max_perf_pct = 100; 2348 2349 intel_pstate_driver = driver; 2350 ret = cpufreq_register_driver(intel_pstate_driver); 2351 if (ret) { 2352 intel_pstate_driver_cleanup(); 2353 return ret; 2354 } 2355 2356 global.min_perf_pct = min_perf_pct_min(); 2357 2358 return 0; 2359 } 2360 2361 static int intel_pstate_unregister_driver(void) 2362 { 2363 if (hwp_active) 2364 return -EBUSY; 2365 2366 cpufreq_unregister_driver(intel_pstate_driver); 2367 intel_pstate_driver_cleanup(); 2368 2369 return 0; 2370 } 2371 2372 static ssize_t intel_pstate_show_status(char *buf) 2373 { 2374 if (!intel_pstate_driver) 2375 return sprintf(buf, "off\n"); 2376 2377 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? 2378 "active" : "passive"); 2379 } 2380 2381 static int intel_pstate_update_status(const char *buf, size_t size) 2382 { 2383 int ret; 2384 2385 if (size == 3 && !strncmp(buf, "off", size)) 2386 return intel_pstate_driver ? 2387 intel_pstate_unregister_driver() : -EINVAL; 2388 2389 if (size == 6 && !strncmp(buf, "active", size)) { 2390 if (intel_pstate_driver) { 2391 if (intel_pstate_driver == &intel_pstate) 2392 return 0; 2393 2394 ret = intel_pstate_unregister_driver(); 2395 if (ret) 2396 return ret; 2397 } 2398 2399 return intel_pstate_register_driver(&intel_pstate); 2400 } 2401 2402 if (size == 7 && !strncmp(buf, "passive", size)) { 2403 if (intel_pstate_driver) { 2404 if (intel_pstate_driver == &intel_cpufreq) 2405 return 0; 2406 2407 ret = intel_pstate_unregister_driver(); 2408 if (ret) 2409 return ret; 2410 } 2411 2412 return intel_pstate_register_driver(&intel_cpufreq); 2413 } 2414 2415 return -EINVAL; 2416 } 2417 2418 static int no_load __initdata; 2419 static int no_hwp __initdata; 2420 static int hwp_only __initdata; 2421 static unsigned int force_load __initdata; 2422 2423 static int __init intel_pstate_msrs_not_valid(void) 2424 { 2425 if (!pstate_funcs.get_max() || 2426 !pstate_funcs.get_min() || 2427 !pstate_funcs.get_turbo()) 2428 return -ENODEV; 2429 2430 return 0; 2431 } 2432 2433 static void __init copy_cpu_funcs(struct pstate_funcs *funcs) 2434 { 2435 pstate_funcs.get_max = funcs->get_max; 2436 pstate_funcs.get_max_physical = funcs->get_max_physical; 2437 pstate_funcs.get_min = funcs->get_min; 2438 pstate_funcs.get_turbo = funcs->get_turbo; 2439 pstate_funcs.get_scaling = funcs->get_scaling; 2440 pstate_funcs.get_val = funcs->get_val; 2441 pstate_funcs.get_vid = funcs->get_vid; 2442 pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift; 2443 } 2444 2445 #ifdef CONFIG_ACPI 2446 2447 static bool __init intel_pstate_no_acpi_pss(void) 2448 { 2449 int i; 2450 2451 for_each_possible_cpu(i) { 2452 acpi_status status; 2453 union acpi_object *pss; 2454 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 2455 struct acpi_processor *pr = per_cpu(processors, i); 2456 2457 if (!pr) 2458 continue; 2459 2460 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 2461 if (ACPI_FAILURE(status)) 2462 continue; 2463 2464 pss = buffer.pointer; 2465 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 2466 kfree(pss); 2467 return false; 2468 } 2469 2470 kfree(pss); 2471 } 2472 2473 pr_debug("ACPI _PSS not found\n"); 2474 return true; 2475 } 2476 2477 static bool __init intel_pstate_no_acpi_pcch(void) 2478 { 2479 acpi_status status; 2480 acpi_handle handle; 2481 2482 status = acpi_get_handle(NULL, "\\_SB", &handle); 2483 if (ACPI_FAILURE(status)) 2484 goto not_found; 2485 2486 if (acpi_has_method(handle, "PCCH")) 2487 return false; 2488 2489 not_found: 2490 pr_debug("ACPI PCCH not found\n"); 2491 return true; 2492 } 2493 2494 static bool __init intel_pstate_has_acpi_ppc(void) 2495 { 2496 int i; 2497 2498 for_each_possible_cpu(i) { 2499 struct acpi_processor *pr = per_cpu(processors, i); 2500 2501 if (!pr) 2502 continue; 2503 if (acpi_has_method(pr->handle, "_PPC")) 2504 return true; 2505 } 2506 pr_debug("ACPI _PPC not found\n"); 2507 return false; 2508 } 2509 2510 enum { 2511 PSS, 2512 PPC, 2513 }; 2514 2515 /* Hardware vendor-specific info that has its own power management modes */ 2516 static struct acpi_platform_list plat_info[] __initdata = { 2517 {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, 0, PSS}, 2518 {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2519 {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2520 {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2521 {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2522 {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2523 {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2524 {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2525 {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2526 {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2527 {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2528 {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2529 {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2530 {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2531 {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, 0, PPC}, 2532 { } /* End */ 2533 }; 2534 2535 static bool __init intel_pstate_platform_pwr_mgmt_exists(void) 2536 { 2537 const struct x86_cpu_id *id; 2538 u64 misc_pwr; 2539 int idx; 2540 2541 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 2542 if (id) { 2543 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 2544 if (misc_pwr & (1 << 8)) { 2545 pr_debug("Bit 8 in the MISC_PWR_MGMT MSR set\n"); 2546 return true; 2547 } 2548 } 2549 2550 idx = acpi_match_platform_list(plat_info); 2551 if (idx < 0) 2552 return false; 2553 2554 switch (plat_info[idx].data) { 2555 case PSS: 2556 if (!intel_pstate_no_acpi_pss()) 2557 return false; 2558 2559 return intel_pstate_no_acpi_pcch(); 2560 case PPC: 2561 return intel_pstate_has_acpi_ppc() && !force_load; 2562 } 2563 2564 return false; 2565 } 2566 2567 static void intel_pstate_request_control_from_smm(void) 2568 { 2569 /* 2570 * It may be unsafe to request P-states control from SMM if _PPC support 2571 * has not been enabled. 2572 */ 2573 if (acpi_ppc) 2574 acpi_processor_pstate_control(); 2575 } 2576 #else /* CONFIG_ACPI not enabled */ 2577 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 2578 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 2579 static inline void intel_pstate_request_control_from_smm(void) {} 2580 #endif /* CONFIG_ACPI */ 2581 2582 #define INTEL_PSTATE_HWP_BROADWELL 0x01 2583 2584 #define ICPU_HWP(model, hwp_mode) \ 2585 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } 2586 2587 static const struct x86_cpu_id hwp_support_ids[] __initconst = { 2588 ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), 2589 ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL), 2590 ICPU_HWP(X86_MODEL_ANY, 0), 2591 {} 2592 }; 2593 2594 static int __init intel_pstate_init(void) 2595 { 2596 const struct x86_cpu_id *id; 2597 int rc; 2598 2599 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 2600 return -ENODEV; 2601 2602 if (no_load) 2603 return -ENODEV; 2604 2605 id = x86_match_cpu(hwp_support_ids); 2606 if (id) { 2607 copy_cpu_funcs(&core_funcs); 2608 if (!no_hwp) { 2609 hwp_active++; 2610 hwp_mode_bdw = id->driver_data; 2611 intel_pstate.attr = hwp_cpufreq_attrs; 2612 goto hwp_cpu_matched; 2613 } 2614 } else { 2615 id = x86_match_cpu(intel_pstate_cpu_ids); 2616 if (!id) { 2617 pr_info("CPU model not supported\n"); 2618 return -ENODEV; 2619 } 2620 2621 copy_cpu_funcs((struct pstate_funcs *)id->driver_data); 2622 } 2623 2624 if (intel_pstate_msrs_not_valid()) { 2625 pr_info("Invalid MSRs\n"); 2626 return -ENODEV; 2627 } 2628 2629 hwp_cpu_matched: 2630 /* 2631 * The Intel pstate driver will be ignored if the platform 2632 * firmware has its own power management modes. 2633 */ 2634 if (intel_pstate_platform_pwr_mgmt_exists()) { 2635 pr_info("P-states controlled by the platform\n"); 2636 return -ENODEV; 2637 } 2638 2639 if (!hwp_active && hwp_only) 2640 return -ENOTSUPP; 2641 2642 pr_info("Intel P-state driver initializing\n"); 2643 2644 all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus())); 2645 if (!all_cpu_data) 2646 return -ENOMEM; 2647 2648 intel_pstate_request_control_from_smm(); 2649 2650 intel_pstate_sysfs_expose_params(); 2651 2652 mutex_lock(&intel_pstate_driver_lock); 2653 rc = intel_pstate_register_driver(default_driver); 2654 mutex_unlock(&intel_pstate_driver_lock); 2655 if (rc) 2656 return rc; 2657 2658 if (hwp_active) 2659 pr_info("HWP enabled\n"); 2660 2661 return 0; 2662 } 2663 device_initcall(intel_pstate_init); 2664 2665 static int __init intel_pstate_setup(char *str) 2666 { 2667 if (!str) 2668 return -EINVAL; 2669 2670 if (!strcmp(str, "disable")) { 2671 no_load = 1; 2672 } else if (!strcmp(str, "passive")) { 2673 pr_info("Passive mode enabled\n"); 2674 default_driver = &intel_cpufreq; 2675 no_hwp = 1; 2676 } 2677 if (!strcmp(str, "no_hwp")) { 2678 pr_info("HWP disabled\n"); 2679 no_hwp = 1; 2680 } 2681 if (!strcmp(str, "force")) 2682 force_load = 1; 2683 if (!strcmp(str, "hwp_only")) 2684 hwp_only = 1; 2685 if (!strcmp(str, "per_cpu_perf_limits")) 2686 per_cpu_limits = true; 2687 2688 #ifdef CONFIG_ACPI 2689 if (!strcmp(str, "support_acpi_ppc")) 2690 acpi_ppc = true; 2691 #endif 2692 2693 return 0; 2694 } 2695 early_param("intel_pstate", intel_pstate_setup); 2696 2697 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 2698 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 2699 MODULE_LICENSE("GPL"); 2700