1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/kernel.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/module.h> 18 #include <linux/ktime.h> 19 #include <linux/hrtimer.h> 20 #include <linux/tick.h> 21 #include <linux/slab.h> 22 #include <linux/sched.h> 23 #include <linux/list.h> 24 #include <linux/cpu.h> 25 #include <linux/cpufreq.h> 26 #include <linux/sysfs.h> 27 #include <linux/types.h> 28 #include <linux/fs.h> 29 #include <linux/debugfs.h> 30 #include <linux/acpi.h> 31 #include <linux/vmalloc.h> 32 #include <trace/events/power.h> 33 34 #include <asm/div64.h> 35 #include <asm/msr.h> 36 #include <asm/cpu_device_id.h> 37 #include <asm/cpufeature.h> 38 #include <asm/intel-family.h> 39 40 #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 41 42 #define ATOM_RATIOS 0x66a 43 #define ATOM_VIDS 0x66b 44 #define ATOM_TURBO_RATIOS 0x66c 45 #define ATOM_TURBO_VIDS 0x66d 46 47 #ifdef CONFIG_ACPI 48 #include <acpi/processor.h> 49 #include <acpi/cppc_acpi.h> 50 #endif 51 52 #define FRAC_BITS 8 53 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 54 #define fp_toint(X) ((X) >> FRAC_BITS) 55 56 #define EXT_BITS 6 57 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS) 58 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS) 59 #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS) 60 61 static inline int32_t mul_fp(int32_t x, int32_t y) 62 { 63 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 64 } 65 66 static inline int32_t div_fp(s64 x, s64 y) 67 { 68 return div64_s64((int64_t)x << FRAC_BITS, y); 69 } 70 71 static inline int ceiling_fp(int32_t x) 72 { 73 int mask, ret; 74 75 ret = fp_toint(x); 76 mask = (1 << FRAC_BITS) - 1; 77 if (x & mask) 78 ret += 1; 79 return ret; 80 } 81 82 static inline u64 mul_ext_fp(u64 x, u64 y) 83 { 84 return (x * y) >> EXT_FRAC_BITS; 85 } 86 87 static inline u64 div_ext_fp(u64 x, u64 y) 88 { 89 return div64_u64(x << EXT_FRAC_BITS, y); 90 } 91 92 /** 93 * struct sample - Store performance sample 94 * @core_avg_perf: Ratio of APERF/MPERF which is the actual average 95 * performance during last sample period 96 * @busy_scaled: Scaled busy value which is used to calculate next 97 * P state. This can be different than core_avg_perf 98 * to account for cpu idle period 99 * @aperf: Difference of actual performance frequency clock count 100 * read from APERF MSR between last and current sample 101 * @mperf: Difference of maximum performance frequency clock count 102 * read from MPERF MSR between last and current sample 103 * @tsc: Difference of time stamp counter between last and 104 * current sample 105 * @time: Current time from scheduler 106 * 107 * This structure is used in the cpudata structure to store performance sample 108 * data for choosing next P State. 109 */ 110 struct sample { 111 int32_t core_avg_perf; 112 int32_t busy_scaled; 113 u64 aperf; 114 u64 mperf; 115 u64 tsc; 116 u64 time; 117 }; 118 119 /** 120 * struct pstate_data - Store P state data 121 * @current_pstate: Current requested P state 122 * @min_pstate: Min P state possible for this platform 123 * @max_pstate: Max P state possible for this platform 124 * @max_pstate_physical:This is physical Max P state for a processor 125 * This can be higher than the max_pstate which can 126 * be limited by platform thermal design power limits 127 * @scaling: Scaling factor to convert frequency to cpufreq 128 * frequency units 129 * @turbo_pstate: Max Turbo P state possible for this platform 130 * @max_freq: @max_pstate frequency in cpufreq units 131 * @turbo_freq: @turbo_pstate frequency in cpufreq units 132 * 133 * Stores the per cpu model P state limits and current P state. 134 */ 135 struct pstate_data { 136 int current_pstate; 137 int min_pstate; 138 int max_pstate; 139 int max_pstate_physical; 140 int scaling; 141 int turbo_pstate; 142 unsigned int max_freq; 143 unsigned int turbo_freq; 144 }; 145 146 /** 147 * struct vid_data - Stores voltage information data 148 * @min: VID data for this platform corresponding to 149 * the lowest P state 150 * @max: VID data corresponding to the highest P State. 151 * @turbo: VID data for turbo P state 152 * @ratio: Ratio of (vid max - vid min) / 153 * (max P state - Min P State) 154 * 155 * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) 156 * This data is used in Atom platforms, where in addition to target P state, 157 * the voltage data needs to be specified to select next P State. 158 */ 159 struct vid_data { 160 int min; 161 int max; 162 int turbo; 163 int32_t ratio; 164 }; 165 166 /** 167 * struct _pid - Stores PID data 168 * @setpoint: Target set point for busyness or performance 169 * @integral: Storage for accumulated error values 170 * @p_gain: PID proportional gain 171 * @i_gain: PID integral gain 172 * @d_gain: PID derivative gain 173 * @deadband: PID deadband 174 * @last_err: Last error storage for integral part of PID calculation 175 * 176 * Stores PID coefficients and last error for PID controller. 177 */ 178 struct _pid { 179 int setpoint; 180 int32_t integral; 181 int32_t p_gain; 182 int32_t i_gain; 183 int32_t d_gain; 184 int deadband; 185 int32_t last_err; 186 }; 187 188 /** 189 * struct perf_limits - Store user and policy limits 190 * @no_turbo: User requested turbo state from intel_pstate sysfs 191 * @turbo_disabled: Platform turbo status either from msr 192 * MSR_IA32_MISC_ENABLE or when maximum available pstate 193 * matches the maximum turbo pstate 194 * @max_perf_pct: Effective maximum performance limit in percentage, this 195 * is minimum of either limits enforced by cpufreq policy 196 * or limits from user set limits via intel_pstate sysfs 197 * @min_perf_pct: Effective minimum performance limit in percentage, this 198 * is maximum of either limits enforced by cpufreq policy 199 * or limits from user set limits via intel_pstate sysfs 200 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct 201 * This value is used to limit max pstate 202 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct 203 * This value is used to limit min pstate 204 * @max_policy_pct: The maximum performance in percentage enforced by 205 * cpufreq setpolicy interface 206 * @max_sysfs_pct: The maximum performance in percentage enforced by 207 * intel pstate sysfs interface, unused when per cpu 208 * controls are enforced 209 * @min_policy_pct: The minimum performance in percentage enforced by 210 * cpufreq setpolicy interface 211 * @min_sysfs_pct: The minimum performance in percentage enforced by 212 * intel pstate sysfs interface, unused when per cpu 213 * controls are enforced 214 * 215 * Storage for user and policy defined limits. 216 */ 217 struct perf_limits { 218 int no_turbo; 219 int turbo_disabled; 220 int max_perf_pct; 221 int min_perf_pct; 222 int32_t max_perf; 223 int32_t min_perf; 224 int max_policy_pct; 225 int max_sysfs_pct; 226 int min_policy_pct; 227 int min_sysfs_pct; 228 }; 229 230 /** 231 * struct cpudata - Per CPU instance data storage 232 * @cpu: CPU number for this instance data 233 * @policy: CPUFreq policy value 234 * @update_util: CPUFreq utility callback information 235 * @update_util_set: CPUFreq utility callback is set 236 * @iowait_boost: iowait-related boost fraction 237 * @last_update: Time of the last update. 238 * @pstate: Stores P state limits for this CPU 239 * @vid: Stores VID limits for this CPU 240 * @pid: Stores PID parameters for this CPU 241 * @last_sample_time: Last Sample time 242 * @prev_aperf: Last APERF value read from APERF MSR 243 * @prev_mperf: Last MPERF value read from MPERF MSR 244 * @prev_tsc: Last timestamp counter (TSC) value 245 * @prev_cummulative_iowait: IO Wait time difference from last and 246 * current sample 247 * @sample: Storage for storing last Sample data 248 * @perf_limits: Pointer to perf_limit unique to this CPU 249 * Not all field in the structure are applicable 250 * when per cpu controls are enforced 251 * @acpi_perf_data: Stores ACPI perf information read from _PSS 252 * @valid_pss_table: Set to true for valid ACPI _PSS entries found 253 * @epp_powersave: Last saved HWP energy performance preference 254 * (EPP) or energy performance bias (EPB), 255 * when policy switched to performance 256 * @epp_policy: Last saved policy used to set EPP/EPB 257 * @epp_default: Power on default HWP energy performance 258 * preference/bias 259 * @epp_saved: Saved EPP/EPB during system suspend or CPU offline 260 * operation 261 * 262 * This structure stores per CPU instance data for all CPUs. 263 */ 264 struct cpudata { 265 int cpu; 266 267 unsigned int policy; 268 struct update_util_data update_util; 269 bool update_util_set; 270 271 struct pstate_data pstate; 272 struct vid_data vid; 273 struct _pid pid; 274 275 u64 last_update; 276 u64 last_sample_time; 277 u64 prev_aperf; 278 u64 prev_mperf; 279 u64 prev_tsc; 280 u64 prev_cummulative_iowait; 281 struct sample sample; 282 struct perf_limits *perf_limits; 283 #ifdef CONFIG_ACPI 284 struct acpi_processor_performance acpi_perf_data; 285 bool valid_pss_table; 286 #endif 287 unsigned int iowait_boost; 288 s16 epp_powersave; 289 s16 epp_policy; 290 s16 epp_default; 291 s16 epp_saved; 292 }; 293 294 static struct cpudata **all_cpu_data; 295 296 /** 297 * struct pstate_adjust_policy - Stores static PID configuration data 298 * @sample_rate_ms: PID calculation sample rate in ms 299 * @sample_rate_ns: Sample rate calculation in ns 300 * @deadband: PID deadband 301 * @setpoint: PID Setpoint 302 * @p_gain_pct: PID proportional gain 303 * @i_gain_pct: PID integral gain 304 * @d_gain_pct: PID derivative gain 305 * 306 * Stores per CPU model static PID configuration data. 307 */ 308 struct pstate_adjust_policy { 309 int sample_rate_ms; 310 s64 sample_rate_ns; 311 int deadband; 312 int setpoint; 313 int p_gain_pct; 314 int d_gain_pct; 315 int i_gain_pct; 316 }; 317 318 /** 319 * struct pstate_funcs - Per CPU model specific callbacks 320 * @get_max: Callback to get maximum non turbo effective P state 321 * @get_max_physical: Callback to get maximum non turbo physical P state 322 * @get_min: Callback to get minimum P state 323 * @get_turbo: Callback to get turbo P state 324 * @get_scaling: Callback to get frequency scaling factor 325 * @get_val: Callback to convert P state to actual MSR write value 326 * @get_vid: Callback to get VID data for Atom platforms 327 * @get_target_pstate: Callback to a function to calculate next P state to use 328 * 329 * Core and Atom CPU models have different way to get P State limits. This 330 * structure is used to store those callbacks. 331 */ 332 struct pstate_funcs { 333 int (*get_max)(void); 334 int (*get_max_physical)(void); 335 int (*get_min)(void); 336 int (*get_turbo)(void); 337 int (*get_scaling)(void); 338 u64 (*get_val)(struct cpudata*, int pstate); 339 void (*get_vid)(struct cpudata *); 340 int32_t (*get_target_pstate)(struct cpudata *); 341 }; 342 343 /** 344 * struct cpu_defaults- Per CPU model default config data 345 * @pid_policy: PID config data 346 * @funcs: Callback function data 347 */ 348 struct cpu_defaults { 349 struct pstate_adjust_policy pid_policy; 350 struct pstate_funcs funcs; 351 }; 352 353 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); 354 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); 355 356 static struct pstate_adjust_policy pid_params __read_mostly; 357 static struct pstate_funcs pstate_funcs __read_mostly; 358 static int hwp_active __read_mostly; 359 static bool per_cpu_limits __read_mostly; 360 361 static bool driver_registered __read_mostly; 362 363 #ifdef CONFIG_ACPI 364 static bool acpi_ppc; 365 #endif 366 367 static struct perf_limits performance_limits = { 368 .no_turbo = 0, 369 .turbo_disabled = 0, 370 .max_perf_pct = 100, 371 .max_perf = int_ext_tofp(1), 372 .min_perf_pct = 100, 373 .min_perf = int_ext_tofp(1), 374 .max_policy_pct = 100, 375 .max_sysfs_pct = 100, 376 .min_policy_pct = 0, 377 .min_sysfs_pct = 0, 378 }; 379 380 static struct perf_limits powersave_limits = { 381 .no_turbo = 0, 382 .turbo_disabled = 0, 383 .max_perf_pct = 100, 384 .max_perf = int_ext_tofp(1), 385 .min_perf_pct = 0, 386 .min_perf = 0, 387 .max_policy_pct = 100, 388 .max_sysfs_pct = 100, 389 .min_policy_pct = 0, 390 .min_sysfs_pct = 0, 391 }; 392 393 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE 394 static struct perf_limits *limits = &performance_limits; 395 #else 396 static struct perf_limits *limits = &powersave_limits; 397 #endif 398 399 static DEFINE_MUTEX(intel_pstate_driver_lock); 400 static DEFINE_MUTEX(intel_pstate_limits_lock); 401 402 #ifdef CONFIG_ACPI 403 404 static bool intel_pstate_get_ppc_enable_status(void) 405 { 406 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || 407 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) 408 return true; 409 410 return acpi_ppc; 411 } 412 413 #ifdef CONFIG_ACPI_CPPC_LIB 414 415 /* The work item is needed to avoid CPU hotplug locking issues */ 416 static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) 417 { 418 sched_set_itmt_support(); 419 } 420 421 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); 422 423 static void intel_pstate_set_itmt_prio(int cpu) 424 { 425 struct cppc_perf_caps cppc_perf; 426 static u32 max_highest_perf = 0, min_highest_perf = U32_MAX; 427 int ret; 428 429 ret = cppc_get_perf_caps(cpu, &cppc_perf); 430 if (ret) 431 return; 432 433 /* 434 * The priorities can be set regardless of whether or not 435 * sched_set_itmt_support(true) has been called and it is valid to 436 * update them at any time after it has been called. 437 */ 438 sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); 439 440 if (max_highest_perf <= min_highest_perf) { 441 if (cppc_perf.highest_perf > max_highest_perf) 442 max_highest_perf = cppc_perf.highest_perf; 443 444 if (cppc_perf.highest_perf < min_highest_perf) 445 min_highest_perf = cppc_perf.highest_perf; 446 447 if (max_highest_perf > min_highest_perf) { 448 /* 449 * This code can be run during CPU online under the 450 * CPU hotplug locks, so sched_set_itmt_support() 451 * cannot be called from here. Queue up a work item 452 * to invoke it. 453 */ 454 schedule_work(&sched_itmt_work); 455 } 456 } 457 } 458 #else 459 static void intel_pstate_set_itmt_prio(int cpu) 460 { 461 } 462 #endif 463 464 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 465 { 466 struct cpudata *cpu; 467 int ret; 468 int i; 469 470 if (hwp_active) { 471 intel_pstate_set_itmt_prio(policy->cpu); 472 return; 473 } 474 475 if (!intel_pstate_get_ppc_enable_status()) 476 return; 477 478 cpu = all_cpu_data[policy->cpu]; 479 480 ret = acpi_processor_register_performance(&cpu->acpi_perf_data, 481 policy->cpu); 482 if (ret) 483 return; 484 485 /* 486 * Check if the control value in _PSS is for PERF_CTL MSR, which should 487 * guarantee that the states returned by it map to the states in our 488 * list directly. 489 */ 490 if (cpu->acpi_perf_data.control_register.space_id != 491 ACPI_ADR_SPACE_FIXED_HARDWARE) 492 goto err; 493 494 /* 495 * If there is only one entry _PSS, simply ignore _PSS and continue as 496 * usual without taking _PSS into account 497 */ 498 if (cpu->acpi_perf_data.state_count < 2) 499 goto err; 500 501 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); 502 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { 503 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", 504 (i == cpu->acpi_perf_data.state ? '*' : ' '), i, 505 (u32) cpu->acpi_perf_data.states[i].core_frequency, 506 (u32) cpu->acpi_perf_data.states[i].power, 507 (u32) cpu->acpi_perf_data.states[i].control); 508 } 509 510 /* 511 * The _PSS table doesn't contain whole turbo frequency range. 512 * This just contains +1 MHZ above the max non turbo frequency, 513 * with control value corresponding to max turbo ratio. But 514 * when cpufreq set policy is called, it will call with this 515 * max frequency, which will cause a reduced performance as 516 * this driver uses real max turbo frequency as the max 517 * frequency. So correct this frequency in _PSS table to 518 * correct max turbo frequency based on the turbo state. 519 * Also need to convert to MHz as _PSS freq is in MHz. 520 */ 521 if (!limits->turbo_disabled) 522 cpu->acpi_perf_data.states[0].core_frequency = 523 policy->cpuinfo.max_freq / 1000; 524 cpu->valid_pss_table = true; 525 pr_debug("_PPC limits will be enforced\n"); 526 527 return; 528 529 err: 530 cpu->valid_pss_table = false; 531 acpi_processor_unregister_performance(policy->cpu); 532 } 533 534 static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 535 { 536 struct cpudata *cpu; 537 538 cpu = all_cpu_data[policy->cpu]; 539 if (!cpu->valid_pss_table) 540 return; 541 542 acpi_processor_unregister_performance(policy->cpu); 543 } 544 #else 545 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 546 { 547 } 548 549 static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 550 { 551 } 552 #endif 553 554 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 555 int deadband, int integral) { 556 pid->setpoint = int_tofp(setpoint); 557 pid->deadband = int_tofp(deadband); 558 pid->integral = int_tofp(integral); 559 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 560 } 561 562 static inline void pid_p_gain_set(struct _pid *pid, int percent) 563 { 564 pid->p_gain = div_fp(percent, 100); 565 } 566 567 static inline void pid_i_gain_set(struct _pid *pid, int percent) 568 { 569 pid->i_gain = div_fp(percent, 100); 570 } 571 572 static inline void pid_d_gain_set(struct _pid *pid, int percent) 573 { 574 pid->d_gain = div_fp(percent, 100); 575 } 576 577 static signed int pid_calc(struct _pid *pid, int32_t busy) 578 { 579 signed int result; 580 int32_t pterm, dterm, fp_error; 581 int32_t integral_limit; 582 583 fp_error = pid->setpoint - busy; 584 585 if (abs(fp_error) <= pid->deadband) 586 return 0; 587 588 pterm = mul_fp(pid->p_gain, fp_error); 589 590 pid->integral += fp_error; 591 592 /* 593 * We limit the integral here so that it will never 594 * get higher than 30. This prevents it from becoming 595 * too large an input over long periods of time and allows 596 * it to get factored out sooner. 597 * 598 * The value of 30 was chosen through experimentation. 599 */ 600 integral_limit = int_tofp(30); 601 if (pid->integral > integral_limit) 602 pid->integral = integral_limit; 603 if (pid->integral < -integral_limit) 604 pid->integral = -integral_limit; 605 606 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 607 pid->last_err = fp_error; 608 609 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 610 result = result + (1 << (FRAC_BITS-1)); 611 return (signed int)fp_toint(result); 612 } 613 614 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 615 { 616 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 617 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 618 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 619 620 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); 621 } 622 623 static inline void intel_pstate_reset_all_pid(void) 624 { 625 unsigned int cpu; 626 627 for_each_online_cpu(cpu) { 628 if (all_cpu_data[cpu]) 629 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 630 } 631 } 632 633 static inline void update_turbo_state(void) 634 { 635 u64 misc_en; 636 struct cpudata *cpu; 637 638 cpu = all_cpu_data[0]; 639 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 640 limits->turbo_disabled = 641 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 642 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 643 } 644 645 static s16 intel_pstate_get_epb(struct cpudata *cpu_data) 646 { 647 u64 epb; 648 int ret; 649 650 if (!static_cpu_has(X86_FEATURE_EPB)) 651 return -ENXIO; 652 653 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 654 if (ret) 655 return (s16)ret; 656 657 return (s16)(epb & 0x0f); 658 } 659 660 static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) 661 { 662 s16 epp; 663 664 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 665 /* 666 * When hwp_req_data is 0, means that caller didn't read 667 * MSR_HWP_REQUEST, so need to read and get EPP. 668 */ 669 if (!hwp_req_data) { 670 epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, 671 &hwp_req_data); 672 if (epp) 673 return epp; 674 } 675 epp = (hwp_req_data >> 24) & 0xff; 676 } else { 677 /* When there is no EPP present, HWP uses EPB settings */ 678 epp = intel_pstate_get_epb(cpu_data); 679 } 680 681 return epp; 682 } 683 684 static int intel_pstate_set_epb(int cpu, s16 pref) 685 { 686 u64 epb; 687 int ret; 688 689 if (!static_cpu_has(X86_FEATURE_EPB)) 690 return -ENXIO; 691 692 ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 693 if (ret) 694 return ret; 695 696 epb = (epb & ~0x0f) | pref; 697 wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb); 698 699 return 0; 700 } 701 702 /* 703 * EPP/EPB display strings corresponding to EPP index in the 704 * energy_perf_strings[] 705 * index String 706 *------------------------------------- 707 * 0 default 708 * 1 performance 709 * 2 balance_performance 710 * 3 balance_power 711 * 4 power 712 */ 713 static const char * const energy_perf_strings[] = { 714 "default", 715 "performance", 716 "balance_performance", 717 "balance_power", 718 "power", 719 NULL 720 }; 721 722 static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) 723 { 724 s16 epp; 725 int index = -EINVAL; 726 727 epp = intel_pstate_get_epp(cpu_data, 0); 728 if (epp < 0) 729 return epp; 730 731 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 732 /* 733 * Range: 734 * 0x00-0x3F : Performance 735 * 0x40-0x7F : Balance performance 736 * 0x80-0xBF : Balance power 737 * 0xC0-0xFF : Power 738 * The EPP is a 8 bit value, but our ranges restrict the 739 * value which can be set. Here only using top two bits 740 * effectively. 741 */ 742 index = (epp >> 6) + 1; 743 } else if (static_cpu_has(X86_FEATURE_EPB)) { 744 /* 745 * Range: 746 * 0x00-0x03 : Performance 747 * 0x04-0x07 : Balance performance 748 * 0x08-0x0B : Balance power 749 * 0x0C-0x0F : Power 750 * The EPB is a 4 bit value, but our ranges restrict the 751 * value which can be set. Here only using top two bits 752 * effectively. 753 */ 754 index = (epp >> 2) + 1; 755 } 756 757 return index; 758 } 759 760 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, 761 int pref_index) 762 { 763 int epp = -EINVAL; 764 int ret; 765 766 if (!pref_index) 767 epp = cpu_data->epp_default; 768 769 mutex_lock(&intel_pstate_limits_lock); 770 771 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 772 u64 value; 773 774 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); 775 if (ret) 776 goto return_pref; 777 778 value &= ~GENMASK_ULL(31, 24); 779 780 /* 781 * If epp is not default, convert from index into 782 * energy_perf_strings to epp value, by shifting 6 783 * bits left to use only top two bits in epp. 784 * The resultant epp need to shifted by 24 bits to 785 * epp position in MSR_HWP_REQUEST. 786 */ 787 if (epp == -EINVAL) 788 epp = (pref_index - 1) << 6; 789 790 value |= (u64)epp << 24; 791 ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); 792 } else { 793 if (epp == -EINVAL) 794 epp = (pref_index - 1) << 2; 795 ret = intel_pstate_set_epb(cpu_data->cpu, epp); 796 } 797 return_pref: 798 mutex_unlock(&intel_pstate_limits_lock); 799 800 return ret; 801 } 802 803 static ssize_t show_energy_performance_available_preferences( 804 struct cpufreq_policy *policy, char *buf) 805 { 806 int i = 0; 807 int ret = 0; 808 809 while (energy_perf_strings[i] != NULL) 810 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]); 811 812 ret += sprintf(&buf[ret], "\n"); 813 814 return ret; 815 } 816 817 cpufreq_freq_attr_ro(energy_performance_available_preferences); 818 819 static ssize_t store_energy_performance_preference( 820 struct cpufreq_policy *policy, const char *buf, size_t count) 821 { 822 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 823 char str_preference[21]; 824 int ret, i = 0; 825 826 ret = sscanf(buf, "%20s", str_preference); 827 if (ret != 1) 828 return -EINVAL; 829 830 while (energy_perf_strings[i] != NULL) { 831 if (!strcmp(str_preference, energy_perf_strings[i])) { 832 intel_pstate_set_energy_pref_index(cpu_data, i); 833 return count; 834 } 835 ++i; 836 } 837 838 return -EINVAL; 839 } 840 841 static ssize_t show_energy_performance_preference( 842 struct cpufreq_policy *policy, char *buf) 843 { 844 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 845 int preference; 846 847 preference = intel_pstate_get_energy_pref_index(cpu_data); 848 if (preference < 0) 849 return preference; 850 851 return sprintf(buf, "%s\n", energy_perf_strings[preference]); 852 } 853 854 cpufreq_freq_attr_rw(energy_performance_preference); 855 856 static struct freq_attr *hwp_cpufreq_attrs[] = { 857 &energy_performance_preference, 858 &energy_performance_available_preferences, 859 NULL, 860 }; 861 862 static void intel_pstate_hwp_set(struct cpufreq_policy *policy) 863 { 864 int min, hw_min, max, hw_max, cpu, range, adj_range; 865 struct perf_limits *perf_limits = limits; 866 u64 value, cap; 867 868 for_each_cpu(cpu, policy->cpus) { 869 int max_perf_pct, min_perf_pct; 870 struct cpudata *cpu_data = all_cpu_data[cpu]; 871 s16 epp; 872 873 if (per_cpu_limits) 874 perf_limits = all_cpu_data[cpu]->perf_limits; 875 876 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 877 hw_min = HWP_LOWEST_PERF(cap); 878 if (limits->no_turbo) 879 hw_max = HWP_GUARANTEED_PERF(cap); 880 else 881 hw_max = HWP_HIGHEST_PERF(cap); 882 range = hw_max - hw_min; 883 884 max_perf_pct = perf_limits->max_perf_pct; 885 min_perf_pct = perf_limits->min_perf_pct; 886 887 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 888 adj_range = min_perf_pct * range / 100; 889 min = hw_min + adj_range; 890 value &= ~HWP_MIN_PERF(~0L); 891 value |= HWP_MIN_PERF(min); 892 893 adj_range = max_perf_pct * range / 100; 894 max = hw_min + adj_range; 895 896 value &= ~HWP_MAX_PERF(~0L); 897 value |= HWP_MAX_PERF(max); 898 899 if (cpu_data->epp_policy == cpu_data->policy) 900 goto skip_epp; 901 902 cpu_data->epp_policy = cpu_data->policy; 903 904 if (cpu_data->epp_saved >= 0) { 905 epp = cpu_data->epp_saved; 906 cpu_data->epp_saved = -EINVAL; 907 goto update_epp; 908 } 909 910 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { 911 epp = intel_pstate_get_epp(cpu_data, value); 912 cpu_data->epp_powersave = epp; 913 /* If EPP read was failed, then don't try to write */ 914 if (epp < 0) 915 goto skip_epp; 916 917 918 epp = 0; 919 } else { 920 /* skip setting EPP, when saved value is invalid */ 921 if (cpu_data->epp_powersave < 0) 922 goto skip_epp; 923 924 /* 925 * No need to restore EPP when it is not zero. This 926 * means: 927 * - Policy is not changed 928 * - user has manually changed 929 * - Error reading EPB 930 */ 931 epp = intel_pstate_get_epp(cpu_data, value); 932 if (epp) 933 goto skip_epp; 934 935 epp = cpu_data->epp_powersave; 936 } 937 update_epp: 938 if (static_cpu_has(X86_FEATURE_HWP_EPP)) { 939 value &= ~GENMASK_ULL(31, 24); 940 value |= (u64)epp << 24; 941 } else { 942 intel_pstate_set_epb(cpu, epp); 943 } 944 skip_epp: 945 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 946 } 947 } 948 949 static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) 950 { 951 if (hwp_active) 952 intel_pstate_hwp_set(policy); 953 954 return 0; 955 } 956 957 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) 958 { 959 struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 960 961 if (!hwp_active) 962 return 0; 963 964 cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0); 965 966 return 0; 967 } 968 969 static int intel_pstate_resume(struct cpufreq_policy *policy) 970 { 971 int ret; 972 973 if (!hwp_active) 974 return 0; 975 976 mutex_lock(&intel_pstate_limits_lock); 977 978 all_cpu_data[policy->cpu]->epp_policy = 0; 979 980 ret = intel_pstate_hwp_set_policy(policy); 981 982 mutex_unlock(&intel_pstate_limits_lock); 983 984 return ret; 985 } 986 987 static void intel_pstate_update_policies(void) 988 { 989 int cpu; 990 991 for_each_possible_cpu(cpu) 992 cpufreq_update_policy(cpu); 993 } 994 995 /************************** debugfs begin ************************/ 996 static int pid_param_set(void *data, u64 val) 997 { 998 *(u32 *)data = val; 999 intel_pstate_reset_all_pid(); 1000 return 0; 1001 } 1002 1003 static int pid_param_get(void *data, u64 *val) 1004 { 1005 *val = *(u32 *)data; 1006 return 0; 1007 } 1008 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); 1009 1010 static struct dentry *debugfs_parent; 1011 1012 struct pid_param { 1013 char *name; 1014 void *value; 1015 struct dentry *dentry; 1016 }; 1017 1018 static struct pid_param pid_files[] = { 1019 {"sample_rate_ms", &pid_params.sample_rate_ms, }, 1020 {"d_gain_pct", &pid_params.d_gain_pct, }, 1021 {"i_gain_pct", &pid_params.i_gain_pct, }, 1022 {"deadband", &pid_params.deadband, }, 1023 {"setpoint", &pid_params.setpoint, }, 1024 {"p_gain_pct", &pid_params.p_gain_pct, }, 1025 {NULL, NULL, } 1026 }; 1027 1028 static void intel_pstate_debug_expose_params(void) 1029 { 1030 int i; 1031 1032 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 1033 if (IS_ERR_OR_NULL(debugfs_parent)) 1034 return; 1035 1036 for (i = 0; pid_files[i].name; i++) { 1037 struct dentry *dentry; 1038 1039 dentry = debugfs_create_file(pid_files[i].name, 0660, 1040 debugfs_parent, pid_files[i].value, 1041 &fops_pid_param); 1042 if (!IS_ERR(dentry)) 1043 pid_files[i].dentry = dentry; 1044 } 1045 } 1046 1047 static void intel_pstate_debug_hide_params(void) 1048 { 1049 int i; 1050 1051 if (IS_ERR_OR_NULL(debugfs_parent)) 1052 return; 1053 1054 for (i = 0; pid_files[i].name; i++) { 1055 debugfs_remove(pid_files[i].dentry); 1056 pid_files[i].dentry = NULL; 1057 } 1058 1059 debugfs_remove(debugfs_parent); 1060 debugfs_parent = NULL; 1061 } 1062 1063 /************************** debugfs end ************************/ 1064 1065 /************************** sysfs begin ************************/ 1066 #define show_one(file_name, object) \ 1067 static ssize_t show_##file_name \ 1068 (struct kobject *kobj, struct attribute *attr, char *buf) \ 1069 { \ 1070 return sprintf(buf, "%u\n", limits->object); \ 1071 } 1072 1073 static ssize_t intel_pstate_show_status(char *buf); 1074 static int intel_pstate_update_status(const char *buf, size_t size); 1075 1076 static ssize_t show_status(struct kobject *kobj, 1077 struct attribute *attr, char *buf) 1078 { 1079 ssize_t ret; 1080 1081 mutex_lock(&intel_pstate_driver_lock); 1082 ret = intel_pstate_show_status(buf); 1083 mutex_unlock(&intel_pstate_driver_lock); 1084 1085 return ret; 1086 } 1087 1088 static ssize_t store_status(struct kobject *a, struct attribute *b, 1089 const char *buf, size_t count) 1090 { 1091 char *p = memchr(buf, '\n', count); 1092 int ret; 1093 1094 mutex_lock(&intel_pstate_driver_lock); 1095 ret = intel_pstate_update_status(buf, p ? p - buf : count); 1096 mutex_unlock(&intel_pstate_driver_lock); 1097 1098 return ret < 0 ? ret : count; 1099 } 1100 1101 static ssize_t show_turbo_pct(struct kobject *kobj, 1102 struct attribute *attr, char *buf) 1103 { 1104 struct cpudata *cpu; 1105 int total, no_turbo, turbo_pct; 1106 uint32_t turbo_fp; 1107 1108 mutex_lock(&intel_pstate_driver_lock); 1109 1110 if (!driver_registered) { 1111 mutex_unlock(&intel_pstate_driver_lock); 1112 return -EAGAIN; 1113 } 1114 1115 cpu = all_cpu_data[0]; 1116 1117 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 1118 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; 1119 turbo_fp = div_fp(no_turbo, total); 1120 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); 1121 1122 mutex_unlock(&intel_pstate_driver_lock); 1123 1124 return sprintf(buf, "%u\n", turbo_pct); 1125 } 1126 1127 static ssize_t show_num_pstates(struct kobject *kobj, 1128 struct attribute *attr, char *buf) 1129 { 1130 struct cpudata *cpu; 1131 int total; 1132 1133 mutex_lock(&intel_pstate_driver_lock); 1134 1135 if (!driver_registered) { 1136 mutex_unlock(&intel_pstate_driver_lock); 1137 return -EAGAIN; 1138 } 1139 1140 cpu = all_cpu_data[0]; 1141 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 1142 1143 mutex_unlock(&intel_pstate_driver_lock); 1144 1145 return sprintf(buf, "%u\n", total); 1146 } 1147 1148 static ssize_t show_no_turbo(struct kobject *kobj, 1149 struct attribute *attr, char *buf) 1150 { 1151 ssize_t ret; 1152 1153 mutex_lock(&intel_pstate_driver_lock); 1154 1155 if (!driver_registered) { 1156 mutex_unlock(&intel_pstate_driver_lock); 1157 return -EAGAIN; 1158 } 1159 1160 update_turbo_state(); 1161 if (limits->turbo_disabled) 1162 ret = sprintf(buf, "%u\n", limits->turbo_disabled); 1163 else 1164 ret = sprintf(buf, "%u\n", limits->no_turbo); 1165 1166 mutex_unlock(&intel_pstate_driver_lock); 1167 1168 return ret; 1169 } 1170 1171 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 1172 const char *buf, size_t count) 1173 { 1174 unsigned int input; 1175 int ret; 1176 1177 ret = sscanf(buf, "%u", &input); 1178 if (ret != 1) 1179 return -EINVAL; 1180 1181 mutex_lock(&intel_pstate_driver_lock); 1182 1183 if (!driver_registered) { 1184 mutex_unlock(&intel_pstate_driver_lock); 1185 return -EAGAIN; 1186 } 1187 1188 mutex_lock(&intel_pstate_limits_lock); 1189 1190 update_turbo_state(); 1191 if (limits->turbo_disabled) { 1192 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 1193 mutex_unlock(&intel_pstate_limits_lock); 1194 mutex_unlock(&intel_pstate_driver_lock); 1195 return -EPERM; 1196 } 1197 1198 limits->no_turbo = clamp_t(int, input, 0, 1); 1199 1200 mutex_unlock(&intel_pstate_limits_lock); 1201 1202 intel_pstate_update_policies(); 1203 1204 mutex_unlock(&intel_pstate_driver_lock); 1205 1206 return count; 1207 } 1208 1209 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 1210 const char *buf, size_t count) 1211 { 1212 unsigned int input; 1213 int ret; 1214 1215 ret = sscanf(buf, "%u", &input); 1216 if (ret != 1) 1217 return -EINVAL; 1218 1219 mutex_lock(&intel_pstate_driver_lock); 1220 1221 if (!driver_registered) { 1222 mutex_unlock(&intel_pstate_driver_lock); 1223 return -EAGAIN; 1224 } 1225 1226 mutex_lock(&intel_pstate_limits_lock); 1227 1228 limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); 1229 limits->max_perf_pct = min(limits->max_policy_pct, 1230 limits->max_sysfs_pct); 1231 limits->max_perf_pct = max(limits->min_policy_pct, 1232 limits->max_perf_pct); 1233 limits->max_perf_pct = max(limits->min_perf_pct, 1234 limits->max_perf_pct); 1235 limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); 1236 1237 mutex_unlock(&intel_pstate_limits_lock); 1238 1239 intel_pstate_update_policies(); 1240 1241 mutex_unlock(&intel_pstate_driver_lock); 1242 1243 return count; 1244 } 1245 1246 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 1247 const char *buf, size_t count) 1248 { 1249 unsigned int input; 1250 int ret; 1251 1252 ret = sscanf(buf, "%u", &input); 1253 if (ret != 1) 1254 return -EINVAL; 1255 1256 mutex_lock(&intel_pstate_driver_lock); 1257 1258 if (!driver_registered) { 1259 mutex_unlock(&intel_pstate_driver_lock); 1260 return -EAGAIN; 1261 } 1262 1263 mutex_lock(&intel_pstate_limits_lock); 1264 1265 limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); 1266 limits->min_perf_pct = max(limits->min_policy_pct, 1267 limits->min_sysfs_pct); 1268 limits->min_perf_pct = min(limits->max_policy_pct, 1269 limits->min_perf_pct); 1270 limits->min_perf_pct = min(limits->max_perf_pct, 1271 limits->min_perf_pct); 1272 limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); 1273 1274 mutex_unlock(&intel_pstate_limits_lock); 1275 1276 intel_pstate_update_policies(); 1277 1278 mutex_unlock(&intel_pstate_driver_lock); 1279 1280 return count; 1281 } 1282 1283 show_one(max_perf_pct, max_perf_pct); 1284 show_one(min_perf_pct, min_perf_pct); 1285 1286 define_one_global_rw(status); 1287 define_one_global_rw(no_turbo); 1288 define_one_global_rw(max_perf_pct); 1289 define_one_global_rw(min_perf_pct); 1290 define_one_global_ro(turbo_pct); 1291 define_one_global_ro(num_pstates); 1292 1293 static struct attribute *intel_pstate_attributes[] = { 1294 &status.attr, 1295 &no_turbo.attr, 1296 &turbo_pct.attr, 1297 &num_pstates.attr, 1298 NULL 1299 }; 1300 1301 static struct attribute_group intel_pstate_attr_group = { 1302 .attrs = intel_pstate_attributes, 1303 }; 1304 1305 static void __init intel_pstate_sysfs_expose_params(void) 1306 { 1307 struct kobject *intel_pstate_kobject; 1308 int rc; 1309 1310 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 1311 &cpu_subsys.dev_root->kobj); 1312 if (WARN_ON(!intel_pstate_kobject)) 1313 return; 1314 1315 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 1316 if (WARN_ON(rc)) 1317 return; 1318 1319 /* 1320 * If per cpu limits are enforced there are no global limits, so 1321 * return without creating max/min_perf_pct attributes 1322 */ 1323 if (per_cpu_limits) 1324 return; 1325 1326 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr); 1327 WARN_ON(rc); 1328 1329 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); 1330 WARN_ON(rc); 1331 1332 } 1333 /************************** sysfs end ************************/ 1334 1335 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 1336 { 1337 /* First disable HWP notification interrupt as we don't process them */ 1338 if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) 1339 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); 1340 1341 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 1342 cpudata->epp_policy = 0; 1343 if (cpudata->epp_default == -EINVAL) 1344 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); 1345 } 1346 1347 #define MSR_IA32_POWER_CTL_BIT_EE 19 1348 1349 /* Disable energy efficiency optimization */ 1350 static void intel_pstate_disable_ee(int cpu) 1351 { 1352 u64 power_ctl; 1353 int ret; 1354 1355 ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); 1356 if (ret) 1357 return; 1358 1359 if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { 1360 pr_info("Disabling energy efficiency optimization\n"); 1361 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); 1362 wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); 1363 } 1364 } 1365 1366 static int atom_get_min_pstate(void) 1367 { 1368 u64 value; 1369 1370 rdmsrl(ATOM_RATIOS, value); 1371 return (value >> 8) & 0x7F; 1372 } 1373 1374 static int atom_get_max_pstate(void) 1375 { 1376 u64 value; 1377 1378 rdmsrl(ATOM_RATIOS, value); 1379 return (value >> 16) & 0x7F; 1380 } 1381 1382 static int atom_get_turbo_pstate(void) 1383 { 1384 u64 value; 1385 1386 rdmsrl(ATOM_TURBO_RATIOS, value); 1387 return value & 0x7F; 1388 } 1389 1390 static u64 atom_get_val(struct cpudata *cpudata, int pstate) 1391 { 1392 u64 val; 1393 int32_t vid_fp; 1394 u32 vid; 1395 1396 val = (u64)pstate << 8; 1397 if (limits->no_turbo && !limits->turbo_disabled) 1398 val |= (u64)1 << 32; 1399 1400 vid_fp = cpudata->vid.min + mul_fp( 1401 int_tofp(pstate - cpudata->pstate.min_pstate), 1402 cpudata->vid.ratio); 1403 1404 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 1405 vid = ceiling_fp(vid_fp); 1406 1407 if (pstate > cpudata->pstate.max_pstate) 1408 vid = cpudata->vid.turbo; 1409 1410 return val | vid; 1411 } 1412 1413 static int silvermont_get_scaling(void) 1414 { 1415 u64 value; 1416 int i; 1417 /* Defined in Table 35-6 from SDM (Sept 2015) */ 1418 static int silvermont_freq_table[] = { 1419 83300, 100000, 133300, 116700, 80000}; 1420 1421 rdmsrl(MSR_FSB_FREQ, value); 1422 i = value & 0x7; 1423 WARN_ON(i > 4); 1424 1425 return silvermont_freq_table[i]; 1426 } 1427 1428 static int airmont_get_scaling(void) 1429 { 1430 u64 value; 1431 int i; 1432 /* Defined in Table 35-10 from SDM (Sept 2015) */ 1433 static int airmont_freq_table[] = { 1434 83300, 100000, 133300, 116700, 80000, 1435 93300, 90000, 88900, 87500}; 1436 1437 rdmsrl(MSR_FSB_FREQ, value); 1438 i = value & 0xF; 1439 WARN_ON(i > 8); 1440 1441 return airmont_freq_table[i]; 1442 } 1443 1444 static void atom_get_vid(struct cpudata *cpudata) 1445 { 1446 u64 value; 1447 1448 rdmsrl(ATOM_VIDS, value); 1449 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 1450 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 1451 cpudata->vid.ratio = div_fp( 1452 cpudata->vid.max - cpudata->vid.min, 1453 int_tofp(cpudata->pstate.max_pstate - 1454 cpudata->pstate.min_pstate)); 1455 1456 rdmsrl(ATOM_TURBO_VIDS, value); 1457 cpudata->vid.turbo = value & 0x7f; 1458 } 1459 1460 static int core_get_min_pstate(void) 1461 { 1462 u64 value; 1463 1464 rdmsrl(MSR_PLATFORM_INFO, value); 1465 return (value >> 40) & 0xFF; 1466 } 1467 1468 static int core_get_max_pstate_physical(void) 1469 { 1470 u64 value; 1471 1472 rdmsrl(MSR_PLATFORM_INFO, value); 1473 return (value >> 8) & 0xFF; 1474 } 1475 1476 static int core_get_tdp_ratio(u64 plat_info) 1477 { 1478 /* Check how many TDP levels present */ 1479 if (plat_info & 0x600000000) { 1480 u64 tdp_ctrl; 1481 u64 tdp_ratio; 1482 int tdp_msr; 1483 int err; 1484 1485 /* Get the TDP level (0, 1, 2) to get ratios */ 1486 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); 1487 if (err) 1488 return err; 1489 1490 /* TDP MSR are continuous starting at 0x648 */ 1491 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); 1492 err = rdmsrl_safe(tdp_msr, &tdp_ratio); 1493 if (err) 1494 return err; 1495 1496 /* For level 1 and 2, bits[23:16] contain the ratio */ 1497 if (tdp_ctrl & 0x03) 1498 tdp_ratio >>= 16; 1499 1500 tdp_ratio &= 0xff; /* ratios are only 8 bits long */ 1501 pr_debug("tdp_ratio %x\n", (int)tdp_ratio); 1502 1503 return (int)tdp_ratio; 1504 } 1505 1506 return -ENXIO; 1507 } 1508 1509 static int core_get_max_pstate(void) 1510 { 1511 u64 tar; 1512 u64 plat_info; 1513 int max_pstate; 1514 int tdp_ratio; 1515 int err; 1516 1517 rdmsrl(MSR_PLATFORM_INFO, plat_info); 1518 max_pstate = (plat_info >> 8) & 0xFF; 1519 1520 tdp_ratio = core_get_tdp_ratio(plat_info); 1521 if (tdp_ratio <= 0) 1522 return max_pstate; 1523 1524 if (hwp_active) { 1525 /* Turbo activation ratio is not used on HWP platforms */ 1526 return tdp_ratio; 1527 } 1528 1529 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); 1530 if (!err) { 1531 int tar_levels; 1532 1533 /* Do some sanity checking for safety */ 1534 tar_levels = tar & 0xff; 1535 if (tdp_ratio - 1 == tar_levels) { 1536 max_pstate = tar_levels; 1537 pr_debug("max_pstate=TAC %x\n", max_pstate); 1538 } 1539 } 1540 1541 return max_pstate; 1542 } 1543 1544 static int core_get_turbo_pstate(void) 1545 { 1546 u64 value; 1547 int nont, ret; 1548 1549 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1550 nont = core_get_max_pstate(); 1551 ret = (value) & 255; 1552 if (ret <= nont) 1553 ret = nont; 1554 return ret; 1555 } 1556 1557 static inline int core_get_scaling(void) 1558 { 1559 return 100000; 1560 } 1561 1562 static u64 core_get_val(struct cpudata *cpudata, int pstate) 1563 { 1564 u64 val; 1565 1566 val = (u64)pstate << 8; 1567 if (limits->no_turbo && !limits->turbo_disabled) 1568 val |= (u64)1 << 32; 1569 1570 return val; 1571 } 1572 1573 static int knl_get_turbo_pstate(void) 1574 { 1575 u64 value; 1576 int nont, ret; 1577 1578 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1579 nont = core_get_max_pstate(); 1580 ret = (((value) >> 8) & 0xFF); 1581 if (ret <= nont) 1582 ret = nont; 1583 return ret; 1584 } 1585 1586 static struct cpu_defaults core_params = { 1587 .pid_policy = { 1588 .sample_rate_ms = 10, 1589 .deadband = 0, 1590 .setpoint = 97, 1591 .p_gain_pct = 20, 1592 .d_gain_pct = 0, 1593 .i_gain_pct = 0, 1594 }, 1595 .funcs = { 1596 .get_max = core_get_max_pstate, 1597 .get_max_physical = core_get_max_pstate_physical, 1598 .get_min = core_get_min_pstate, 1599 .get_turbo = core_get_turbo_pstate, 1600 .get_scaling = core_get_scaling, 1601 .get_val = core_get_val, 1602 .get_target_pstate = get_target_pstate_use_performance, 1603 }, 1604 }; 1605 1606 static const struct cpu_defaults silvermont_params = { 1607 .pid_policy = { 1608 .sample_rate_ms = 10, 1609 .deadband = 0, 1610 .setpoint = 60, 1611 .p_gain_pct = 14, 1612 .d_gain_pct = 0, 1613 .i_gain_pct = 4, 1614 }, 1615 .funcs = { 1616 .get_max = atom_get_max_pstate, 1617 .get_max_physical = atom_get_max_pstate, 1618 .get_min = atom_get_min_pstate, 1619 .get_turbo = atom_get_turbo_pstate, 1620 .get_val = atom_get_val, 1621 .get_scaling = silvermont_get_scaling, 1622 .get_vid = atom_get_vid, 1623 .get_target_pstate = get_target_pstate_use_cpu_load, 1624 }, 1625 }; 1626 1627 static const struct cpu_defaults airmont_params = { 1628 .pid_policy = { 1629 .sample_rate_ms = 10, 1630 .deadband = 0, 1631 .setpoint = 60, 1632 .p_gain_pct = 14, 1633 .d_gain_pct = 0, 1634 .i_gain_pct = 4, 1635 }, 1636 .funcs = { 1637 .get_max = atom_get_max_pstate, 1638 .get_max_physical = atom_get_max_pstate, 1639 .get_min = atom_get_min_pstate, 1640 .get_turbo = atom_get_turbo_pstate, 1641 .get_val = atom_get_val, 1642 .get_scaling = airmont_get_scaling, 1643 .get_vid = atom_get_vid, 1644 .get_target_pstate = get_target_pstate_use_cpu_load, 1645 }, 1646 }; 1647 1648 static const struct cpu_defaults knl_params = { 1649 .pid_policy = { 1650 .sample_rate_ms = 10, 1651 .deadband = 0, 1652 .setpoint = 97, 1653 .p_gain_pct = 20, 1654 .d_gain_pct = 0, 1655 .i_gain_pct = 0, 1656 }, 1657 .funcs = { 1658 .get_max = core_get_max_pstate, 1659 .get_max_physical = core_get_max_pstate_physical, 1660 .get_min = core_get_min_pstate, 1661 .get_turbo = knl_get_turbo_pstate, 1662 .get_scaling = core_get_scaling, 1663 .get_val = core_get_val, 1664 .get_target_pstate = get_target_pstate_use_performance, 1665 }, 1666 }; 1667 1668 static const struct cpu_defaults bxt_params = { 1669 .pid_policy = { 1670 .sample_rate_ms = 10, 1671 .deadband = 0, 1672 .setpoint = 60, 1673 .p_gain_pct = 14, 1674 .d_gain_pct = 0, 1675 .i_gain_pct = 4, 1676 }, 1677 .funcs = { 1678 .get_max = core_get_max_pstate, 1679 .get_max_physical = core_get_max_pstate_physical, 1680 .get_min = core_get_min_pstate, 1681 .get_turbo = core_get_turbo_pstate, 1682 .get_scaling = core_get_scaling, 1683 .get_val = core_get_val, 1684 .get_target_pstate = get_target_pstate_use_cpu_load, 1685 }, 1686 }; 1687 1688 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 1689 { 1690 int max_perf = cpu->pstate.turbo_pstate; 1691 int max_perf_adj; 1692 int min_perf; 1693 struct perf_limits *perf_limits = limits; 1694 1695 if (limits->no_turbo || limits->turbo_disabled) 1696 max_perf = cpu->pstate.max_pstate; 1697 1698 if (per_cpu_limits) 1699 perf_limits = cpu->perf_limits; 1700 1701 /* 1702 * performance can be limited by user through sysfs, by cpufreq 1703 * policy, or by cpu specific default values determined through 1704 * experimentation. 1705 */ 1706 max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf); 1707 *max = clamp_t(int, max_perf_adj, 1708 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 1709 1710 min_perf = fp_ext_toint(max_perf * perf_limits->min_perf); 1711 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 1712 } 1713 1714 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 1715 { 1716 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 1717 cpu->pstate.current_pstate = pstate; 1718 /* 1719 * Generally, there is no guarantee that this code will always run on 1720 * the CPU being updated, so force the register update to run on the 1721 * right CPU. 1722 */ 1723 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, 1724 pstate_funcs.get_val(cpu, pstate)); 1725 } 1726 1727 static void intel_pstate_set_min_pstate(struct cpudata *cpu) 1728 { 1729 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 1730 } 1731 1732 static void intel_pstate_max_within_limits(struct cpudata *cpu) 1733 { 1734 int min_pstate, max_pstate; 1735 1736 update_turbo_state(); 1737 intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); 1738 intel_pstate_set_pstate(cpu, max_pstate); 1739 } 1740 1741 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 1742 { 1743 cpu->pstate.min_pstate = pstate_funcs.get_min(); 1744 cpu->pstate.max_pstate = pstate_funcs.get_max(); 1745 cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); 1746 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 1747 cpu->pstate.scaling = pstate_funcs.get_scaling(); 1748 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; 1749 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1750 1751 if (pstate_funcs.get_vid) 1752 pstate_funcs.get_vid(cpu); 1753 1754 intel_pstate_set_min_pstate(cpu); 1755 } 1756 1757 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) 1758 { 1759 struct sample *sample = &cpu->sample; 1760 1761 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf); 1762 } 1763 1764 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) 1765 { 1766 u64 aperf, mperf; 1767 unsigned long flags; 1768 u64 tsc; 1769 1770 local_irq_save(flags); 1771 rdmsrl(MSR_IA32_APERF, aperf); 1772 rdmsrl(MSR_IA32_MPERF, mperf); 1773 tsc = rdtsc(); 1774 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { 1775 local_irq_restore(flags); 1776 return false; 1777 } 1778 local_irq_restore(flags); 1779 1780 cpu->last_sample_time = cpu->sample.time; 1781 cpu->sample.time = time; 1782 cpu->sample.aperf = aperf; 1783 cpu->sample.mperf = mperf; 1784 cpu->sample.tsc = tsc; 1785 cpu->sample.aperf -= cpu->prev_aperf; 1786 cpu->sample.mperf -= cpu->prev_mperf; 1787 cpu->sample.tsc -= cpu->prev_tsc; 1788 1789 cpu->prev_aperf = aperf; 1790 cpu->prev_mperf = mperf; 1791 cpu->prev_tsc = tsc; 1792 /* 1793 * First time this function is invoked in a given cycle, all of the 1794 * previous sample data fields are equal to zero or stale and they must 1795 * be populated with meaningful numbers for things to work, so assume 1796 * that sample.time will always be reset before setting the utilization 1797 * update hook and make the caller skip the sample then. 1798 */ 1799 return !!cpu->last_sample_time; 1800 } 1801 1802 static inline int32_t get_avg_frequency(struct cpudata *cpu) 1803 { 1804 return mul_ext_fp(cpu->sample.core_avg_perf, 1805 cpu->pstate.max_pstate_physical * cpu->pstate.scaling); 1806 } 1807 1808 static inline int32_t get_avg_pstate(struct cpudata *cpu) 1809 { 1810 return mul_ext_fp(cpu->pstate.max_pstate_physical, 1811 cpu->sample.core_avg_perf); 1812 } 1813 1814 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) 1815 { 1816 struct sample *sample = &cpu->sample; 1817 int32_t busy_frac, boost; 1818 int target, avg_pstate; 1819 1820 busy_frac = div_fp(sample->mperf, sample->tsc); 1821 1822 boost = cpu->iowait_boost; 1823 cpu->iowait_boost >>= 1; 1824 1825 if (busy_frac < boost) 1826 busy_frac = boost; 1827 1828 sample->busy_scaled = busy_frac * 100; 1829 1830 target = limits->no_turbo || limits->turbo_disabled ? 1831 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 1832 target += target >> 2; 1833 target = mul_fp(target, busy_frac); 1834 if (target < cpu->pstate.min_pstate) 1835 target = cpu->pstate.min_pstate; 1836 1837 /* 1838 * If the average P-state during the previous cycle was higher than the 1839 * current target, add 50% of the difference to the target to reduce 1840 * possible performance oscillations and offset possible performance 1841 * loss related to moving the workload from one CPU to another within 1842 * a package/module. 1843 */ 1844 avg_pstate = get_avg_pstate(cpu); 1845 if (avg_pstate > target) 1846 target += (avg_pstate - target) >> 1; 1847 1848 return target; 1849 } 1850 1851 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) 1852 { 1853 int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; 1854 u64 duration_ns; 1855 1856 /* 1857 * perf_scaled is the ratio of the average P-state during the last 1858 * sampling period to the P-state requested last time (in percent). 1859 * 1860 * That measures the system's response to the previous P-state 1861 * selection. 1862 */ 1863 max_pstate = cpu->pstate.max_pstate_physical; 1864 current_pstate = cpu->pstate.current_pstate; 1865 perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf, 1866 div_fp(100 * max_pstate, current_pstate)); 1867 1868 /* 1869 * Since our utilization update callback will not run unless we are 1870 * in C0, check if the actual elapsed time is significantly greater (3x) 1871 * than our sample interval. If it is, then we were idle for a long 1872 * enough period of time to adjust our performance metric. 1873 */ 1874 duration_ns = cpu->sample.time - cpu->last_sample_time; 1875 if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { 1876 sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); 1877 perf_scaled = mul_fp(perf_scaled, sample_ratio); 1878 } else { 1879 sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); 1880 if (sample_ratio < int_tofp(1)) 1881 perf_scaled = 0; 1882 } 1883 1884 cpu->sample.busy_scaled = perf_scaled; 1885 return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled); 1886 } 1887 1888 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) 1889 { 1890 int max_perf, min_perf; 1891 1892 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 1893 pstate = clamp_t(int, pstate, min_perf, max_perf); 1894 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 1895 return pstate; 1896 } 1897 1898 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) 1899 { 1900 pstate = intel_pstate_prepare_request(cpu, pstate); 1901 if (pstate == cpu->pstate.current_pstate) 1902 return; 1903 1904 cpu->pstate.current_pstate = pstate; 1905 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); 1906 } 1907 1908 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 1909 { 1910 int from, target_pstate; 1911 struct sample *sample; 1912 1913 from = cpu->pstate.current_pstate; 1914 1915 target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? 1916 cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); 1917 1918 update_turbo_state(); 1919 1920 intel_pstate_update_pstate(cpu, target_pstate); 1921 1922 sample = &cpu->sample; 1923 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), 1924 fp_toint(sample->busy_scaled), 1925 from, 1926 cpu->pstate.current_pstate, 1927 sample->mperf, 1928 sample->aperf, 1929 sample->tsc, 1930 get_avg_frequency(cpu), 1931 fp_toint(cpu->iowait_boost * 100)); 1932 } 1933 1934 static void intel_pstate_update_util(struct update_util_data *data, u64 time, 1935 unsigned int flags) 1936 { 1937 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1938 u64 delta_ns; 1939 1940 if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { 1941 if (flags & SCHED_CPUFREQ_IOWAIT) { 1942 cpu->iowait_boost = int_tofp(1); 1943 } else if (cpu->iowait_boost) { 1944 /* Clear iowait_boost if the CPU may have been idle. */ 1945 delta_ns = time - cpu->last_update; 1946 if (delta_ns > TICK_NSEC) 1947 cpu->iowait_boost = 0; 1948 } 1949 cpu->last_update = time; 1950 } 1951 1952 delta_ns = time - cpu->sample.time; 1953 if ((s64)delta_ns >= pid_params.sample_rate_ns) { 1954 bool sample_taken = intel_pstate_sample(cpu, time); 1955 1956 if (sample_taken) { 1957 intel_pstate_calc_avg_perf(cpu); 1958 if (!hwp_active) 1959 intel_pstate_adjust_busy_pstate(cpu); 1960 } 1961 } 1962 } 1963 1964 #define ICPU(model, policy) \ 1965 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 1966 (unsigned long)&policy } 1967 1968 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 1969 ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), 1970 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), 1971 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), 1972 ICPU(INTEL_FAM6_IVYBRIDGE, core_params), 1973 ICPU(INTEL_FAM6_HASWELL_CORE, core_params), 1974 ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), 1975 ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), 1976 ICPU(INTEL_FAM6_HASWELL_X, core_params), 1977 ICPU(INTEL_FAM6_HASWELL_ULT, core_params), 1978 ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), 1979 ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), 1980 ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), 1981 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), 1982 ICPU(INTEL_FAM6_BROADWELL_X, core_params), 1983 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), 1984 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), 1985 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), 1986 ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params), 1987 ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), 1988 {} 1989 }; 1990 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 1991 1992 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { 1993 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), 1994 ICPU(INTEL_FAM6_BROADWELL_X, core_params), 1995 ICPU(INTEL_FAM6_SKYLAKE_X, core_params), 1996 {} 1997 }; 1998 1999 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { 2000 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params), 2001 {} 2002 }; 2003 2004 static int intel_pstate_init_cpu(unsigned int cpunum) 2005 { 2006 struct cpudata *cpu; 2007 2008 cpu = all_cpu_data[cpunum]; 2009 2010 if (!cpu) { 2011 unsigned int size = sizeof(struct cpudata); 2012 2013 if (per_cpu_limits) 2014 size += sizeof(struct perf_limits); 2015 2016 cpu = kzalloc(size, GFP_KERNEL); 2017 if (!cpu) 2018 return -ENOMEM; 2019 2020 all_cpu_data[cpunum] = cpu; 2021 if (per_cpu_limits) 2022 cpu->perf_limits = (struct perf_limits *)(cpu + 1); 2023 2024 cpu->epp_default = -EINVAL; 2025 cpu->epp_powersave = -EINVAL; 2026 cpu->epp_saved = -EINVAL; 2027 } 2028 2029 cpu = all_cpu_data[cpunum]; 2030 2031 cpu->cpu = cpunum; 2032 2033 if (hwp_active) { 2034 const struct x86_cpu_id *id; 2035 2036 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); 2037 if (id) 2038 intel_pstate_disable_ee(cpunum); 2039 2040 intel_pstate_hwp_enable(cpu); 2041 pid_params.sample_rate_ms = 50; 2042 pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; 2043 } 2044 2045 intel_pstate_get_cpu_pstates(cpu); 2046 2047 intel_pstate_busy_pid_reset(cpu); 2048 2049 pr_debug("controlling: cpu %d\n", cpunum); 2050 2051 return 0; 2052 } 2053 2054 static unsigned int intel_pstate_get(unsigned int cpu_num) 2055 { 2056 struct cpudata *cpu = all_cpu_data[cpu_num]; 2057 2058 return cpu ? get_avg_frequency(cpu) : 0; 2059 } 2060 2061 static void intel_pstate_set_update_util_hook(unsigned int cpu_num) 2062 { 2063 struct cpudata *cpu = all_cpu_data[cpu_num]; 2064 2065 if (cpu->update_util_set) 2066 return; 2067 2068 /* Prevent intel_pstate_update_util() from using stale data. */ 2069 cpu->sample.time = 0; 2070 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, 2071 intel_pstate_update_util); 2072 cpu->update_util_set = true; 2073 } 2074 2075 static void intel_pstate_clear_update_util_hook(unsigned int cpu) 2076 { 2077 struct cpudata *cpu_data = all_cpu_data[cpu]; 2078 2079 if (!cpu_data->update_util_set) 2080 return; 2081 2082 cpufreq_remove_update_util_hook(cpu); 2083 cpu_data->update_util_set = false; 2084 synchronize_sched(); 2085 } 2086 2087 static void intel_pstate_set_performance_limits(struct perf_limits *limits) 2088 { 2089 limits->no_turbo = 0; 2090 limits->turbo_disabled = 0; 2091 limits->max_perf_pct = 100; 2092 limits->max_perf = int_ext_tofp(1); 2093 limits->min_perf_pct = 100; 2094 limits->min_perf = int_ext_tofp(1); 2095 limits->max_policy_pct = 100; 2096 limits->max_sysfs_pct = 100; 2097 limits->min_policy_pct = 0; 2098 limits->min_sysfs_pct = 0; 2099 } 2100 2101 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, 2102 struct perf_limits *limits) 2103 { 2104 2105 limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, 2106 policy->cpuinfo.max_freq); 2107 limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); 2108 if (policy->max == policy->min) { 2109 limits->min_policy_pct = limits->max_policy_pct; 2110 } else { 2111 limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100, 2112 policy->cpuinfo.max_freq); 2113 limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 2114 0, 100); 2115 } 2116 2117 /* Normalize user input to [min_policy_pct, max_policy_pct] */ 2118 limits->min_perf_pct = max(limits->min_policy_pct, 2119 limits->min_sysfs_pct); 2120 limits->min_perf_pct = min(limits->max_policy_pct, 2121 limits->min_perf_pct); 2122 limits->max_perf_pct = min(limits->max_policy_pct, 2123 limits->max_sysfs_pct); 2124 limits->max_perf_pct = max(limits->min_policy_pct, 2125 limits->max_perf_pct); 2126 2127 /* Make sure min_perf_pct <= max_perf_pct */ 2128 limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); 2129 2130 limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); 2131 limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); 2132 limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); 2133 limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); 2134 2135 pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, 2136 limits->max_perf_pct, limits->min_perf_pct); 2137 } 2138 2139 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 2140 { 2141 struct cpudata *cpu; 2142 struct perf_limits *perf_limits = NULL; 2143 2144 if (!policy->cpuinfo.max_freq) 2145 return -ENODEV; 2146 2147 pr_debug("set_policy cpuinfo.max %u policy->max %u\n", 2148 policy->cpuinfo.max_freq, policy->max); 2149 2150 cpu = all_cpu_data[policy->cpu]; 2151 cpu->policy = policy->policy; 2152 2153 if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && 2154 policy->max < policy->cpuinfo.max_freq && 2155 policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { 2156 pr_debug("policy->max > max non turbo frequency\n"); 2157 policy->max = policy->cpuinfo.max_freq; 2158 } 2159 2160 if (per_cpu_limits) 2161 perf_limits = cpu->perf_limits; 2162 2163 mutex_lock(&intel_pstate_limits_lock); 2164 2165 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 2166 if (!perf_limits) { 2167 limits = &performance_limits; 2168 perf_limits = limits; 2169 } 2170 if (policy->max >= policy->cpuinfo.max_freq && 2171 !limits->no_turbo) { 2172 pr_debug("set performance\n"); 2173 intel_pstate_set_performance_limits(perf_limits); 2174 goto out; 2175 } 2176 } else { 2177 pr_debug("set powersave\n"); 2178 if (!perf_limits) { 2179 limits = &powersave_limits; 2180 perf_limits = limits; 2181 } 2182 2183 } 2184 2185 intel_pstate_update_perf_limits(policy, perf_limits); 2186 out: 2187 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 2188 /* 2189 * NOHZ_FULL CPUs need this as the governor callback may not 2190 * be invoked on them. 2191 */ 2192 intel_pstate_clear_update_util_hook(policy->cpu); 2193 intel_pstate_max_within_limits(cpu); 2194 } 2195 2196 intel_pstate_set_update_util_hook(policy->cpu); 2197 2198 intel_pstate_hwp_set_policy(policy); 2199 2200 mutex_unlock(&intel_pstate_limits_lock); 2201 2202 return 0; 2203 } 2204 2205 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 2206 { 2207 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2208 struct perf_limits *perf_limits; 2209 2210 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) 2211 perf_limits = &performance_limits; 2212 else 2213 perf_limits = &powersave_limits; 2214 2215 update_turbo_state(); 2216 policy->cpuinfo.max_freq = perf_limits->turbo_disabled || 2217 perf_limits->no_turbo ? 2218 cpu->pstate.max_freq : 2219 cpu->pstate.turbo_freq; 2220 2221 cpufreq_verify_within_cpu_limits(policy); 2222 2223 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 2224 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 2225 return -EINVAL; 2226 2227 /* When per-CPU limits are used, sysfs limits are not used */ 2228 if (!per_cpu_limits) { 2229 unsigned int max_freq, min_freq; 2230 2231 max_freq = policy->cpuinfo.max_freq * 2232 limits->max_sysfs_pct / 100; 2233 min_freq = policy->cpuinfo.max_freq * 2234 limits->min_sysfs_pct / 100; 2235 cpufreq_verify_within_limits(policy, min_freq, max_freq); 2236 } 2237 2238 return 0; 2239 } 2240 2241 static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy) 2242 { 2243 intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); 2244 } 2245 2246 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 2247 { 2248 pr_debug("CPU %d exiting\n", policy->cpu); 2249 2250 intel_pstate_clear_update_util_hook(policy->cpu); 2251 if (hwp_active) 2252 intel_pstate_hwp_save_state(policy); 2253 else 2254 intel_cpufreq_stop_cpu(policy); 2255 } 2256 2257 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) 2258 { 2259 intel_pstate_exit_perf_limits(policy); 2260 2261 policy->fast_switch_possible = false; 2262 2263 return 0; 2264 } 2265 2266 static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) 2267 { 2268 struct cpudata *cpu; 2269 int rc; 2270 2271 rc = intel_pstate_init_cpu(policy->cpu); 2272 if (rc) 2273 return rc; 2274 2275 cpu = all_cpu_data[policy->cpu]; 2276 2277 /* 2278 * We need sane value in the cpu->perf_limits, so inherit from global 2279 * perf_limits limits, which are seeded with values based on the 2280 * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. 2281 */ 2282 if (per_cpu_limits) 2283 memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); 2284 2285 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 2286 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 2287 2288 /* cpuinfo and default policy values */ 2289 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 2290 update_turbo_state(); 2291 policy->cpuinfo.max_freq = limits->turbo_disabled ? 2292 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 2293 policy->cpuinfo.max_freq *= cpu->pstate.scaling; 2294 2295 intel_pstate_init_acpi_perf_limits(policy); 2296 cpumask_set_cpu(policy->cpu, policy->cpus); 2297 2298 policy->fast_switch_possible = true; 2299 2300 return 0; 2301 } 2302 2303 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 2304 { 2305 int ret = __intel_pstate_cpu_init(policy); 2306 2307 if (ret) 2308 return ret; 2309 2310 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 2311 if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) 2312 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 2313 else 2314 policy->policy = CPUFREQ_POLICY_POWERSAVE; 2315 2316 return 0; 2317 } 2318 2319 static struct cpufreq_driver intel_pstate = { 2320 .flags = CPUFREQ_CONST_LOOPS, 2321 .verify = intel_pstate_verify_policy, 2322 .setpolicy = intel_pstate_set_policy, 2323 .suspend = intel_pstate_hwp_save_state, 2324 .resume = intel_pstate_resume, 2325 .get = intel_pstate_get, 2326 .init = intel_pstate_cpu_init, 2327 .exit = intel_pstate_cpu_exit, 2328 .stop_cpu = intel_pstate_stop_cpu, 2329 .name = "intel_pstate", 2330 }; 2331 2332 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) 2333 { 2334 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2335 struct perf_limits *perf_limits = limits; 2336 2337 update_turbo_state(); 2338 policy->cpuinfo.max_freq = limits->turbo_disabled ? 2339 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 2340 2341 cpufreq_verify_within_cpu_limits(policy); 2342 2343 if (per_cpu_limits) 2344 perf_limits = cpu->perf_limits; 2345 2346 mutex_lock(&intel_pstate_limits_lock); 2347 2348 intel_pstate_update_perf_limits(policy, perf_limits); 2349 2350 mutex_unlock(&intel_pstate_limits_lock); 2351 2352 return 0; 2353 } 2354 2355 static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, 2356 struct cpufreq_policy *policy, 2357 unsigned int target_freq) 2358 { 2359 unsigned int max_freq; 2360 2361 update_turbo_state(); 2362 2363 max_freq = limits->no_turbo || limits->turbo_disabled ? 2364 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 2365 policy->cpuinfo.max_freq = max_freq; 2366 if (policy->max > max_freq) 2367 policy->max = max_freq; 2368 2369 if (target_freq > max_freq) 2370 target_freq = max_freq; 2371 2372 return target_freq; 2373 } 2374 2375 static int intel_cpufreq_target(struct cpufreq_policy *policy, 2376 unsigned int target_freq, 2377 unsigned int relation) 2378 { 2379 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2380 struct cpufreq_freqs freqs; 2381 int target_pstate; 2382 2383 freqs.old = policy->cur; 2384 freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq); 2385 2386 cpufreq_freq_transition_begin(policy, &freqs); 2387 switch (relation) { 2388 case CPUFREQ_RELATION_L: 2389 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); 2390 break; 2391 case CPUFREQ_RELATION_H: 2392 target_pstate = freqs.new / cpu->pstate.scaling; 2393 break; 2394 default: 2395 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); 2396 break; 2397 } 2398 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 2399 if (target_pstate != cpu->pstate.current_pstate) { 2400 cpu->pstate.current_pstate = target_pstate; 2401 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, 2402 pstate_funcs.get_val(cpu, target_pstate)); 2403 } 2404 cpufreq_freq_transition_end(policy, &freqs, false); 2405 2406 return 0; 2407 } 2408 2409 static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, 2410 unsigned int target_freq) 2411 { 2412 struct cpudata *cpu = all_cpu_data[policy->cpu]; 2413 int target_pstate; 2414 2415 target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); 2416 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); 2417 intel_pstate_update_pstate(cpu, target_pstate); 2418 return target_freq; 2419 } 2420 2421 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) 2422 { 2423 int ret = __intel_pstate_cpu_init(policy); 2424 2425 if (ret) 2426 return ret; 2427 2428 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; 2429 /* This reflects the intel_pstate_get_cpu_pstates() setting. */ 2430 policy->cur = policy->cpuinfo.min_freq; 2431 2432 return 0; 2433 } 2434 2435 static struct cpufreq_driver intel_cpufreq = { 2436 .flags = CPUFREQ_CONST_LOOPS, 2437 .verify = intel_cpufreq_verify_policy, 2438 .target = intel_cpufreq_target, 2439 .fast_switch = intel_cpufreq_fast_switch, 2440 .init = intel_cpufreq_cpu_init, 2441 .exit = intel_pstate_cpu_exit, 2442 .stop_cpu = intel_cpufreq_stop_cpu, 2443 .name = "intel_cpufreq", 2444 }; 2445 2446 static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; 2447 2448 static void intel_pstate_driver_cleanup(void) 2449 { 2450 unsigned int cpu; 2451 2452 get_online_cpus(); 2453 for_each_online_cpu(cpu) { 2454 if (all_cpu_data[cpu]) { 2455 if (intel_pstate_driver == &intel_pstate) 2456 intel_pstate_clear_update_util_hook(cpu); 2457 2458 kfree(all_cpu_data[cpu]); 2459 all_cpu_data[cpu] = NULL; 2460 } 2461 } 2462 put_online_cpus(); 2463 } 2464 2465 static int intel_pstate_register_driver(void) 2466 { 2467 int ret; 2468 2469 ret = cpufreq_register_driver(intel_pstate_driver); 2470 if (ret) { 2471 intel_pstate_driver_cleanup(); 2472 return ret; 2473 } 2474 2475 mutex_lock(&intel_pstate_limits_lock); 2476 driver_registered = true; 2477 mutex_unlock(&intel_pstate_limits_lock); 2478 2479 if (intel_pstate_driver == &intel_pstate && !hwp_active && 2480 pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) 2481 intel_pstate_debug_expose_params(); 2482 2483 return 0; 2484 } 2485 2486 static int intel_pstate_unregister_driver(void) 2487 { 2488 if (hwp_active) 2489 return -EBUSY; 2490 2491 if (intel_pstate_driver == &intel_pstate && !hwp_active && 2492 pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) 2493 intel_pstate_debug_hide_params(); 2494 2495 mutex_lock(&intel_pstate_limits_lock); 2496 driver_registered = false; 2497 mutex_unlock(&intel_pstate_limits_lock); 2498 2499 cpufreq_unregister_driver(intel_pstate_driver); 2500 intel_pstate_driver_cleanup(); 2501 2502 return 0; 2503 } 2504 2505 static ssize_t intel_pstate_show_status(char *buf) 2506 { 2507 if (!driver_registered) 2508 return sprintf(buf, "off\n"); 2509 2510 return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? 2511 "active" : "passive"); 2512 } 2513 2514 static int intel_pstate_update_status(const char *buf, size_t size) 2515 { 2516 int ret; 2517 2518 if (size == 3 && !strncmp(buf, "off", size)) 2519 return driver_registered ? 2520 intel_pstate_unregister_driver() : -EINVAL; 2521 2522 if (size == 6 && !strncmp(buf, "active", size)) { 2523 if (driver_registered) { 2524 if (intel_pstate_driver == &intel_pstate) 2525 return 0; 2526 2527 ret = intel_pstate_unregister_driver(); 2528 if (ret) 2529 return ret; 2530 } 2531 2532 intel_pstate_driver = &intel_pstate; 2533 return intel_pstate_register_driver(); 2534 } 2535 2536 if (size == 7 && !strncmp(buf, "passive", size)) { 2537 if (driver_registered) { 2538 if (intel_pstate_driver != &intel_pstate) 2539 return 0; 2540 2541 ret = intel_pstate_unregister_driver(); 2542 if (ret) 2543 return ret; 2544 } 2545 2546 intel_pstate_driver = &intel_cpufreq; 2547 return intel_pstate_register_driver(); 2548 } 2549 2550 return -EINVAL; 2551 } 2552 2553 static int no_load __initdata; 2554 static int no_hwp __initdata; 2555 static int hwp_only __initdata; 2556 static unsigned int force_load __initdata; 2557 2558 static int __init intel_pstate_msrs_not_valid(void) 2559 { 2560 if (!pstate_funcs.get_max() || 2561 !pstate_funcs.get_min() || 2562 !pstate_funcs.get_turbo()) 2563 return -ENODEV; 2564 2565 return 0; 2566 } 2567 2568 static void __init copy_pid_params(struct pstate_adjust_policy *policy) 2569 { 2570 pid_params.sample_rate_ms = policy->sample_rate_ms; 2571 pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; 2572 pid_params.p_gain_pct = policy->p_gain_pct; 2573 pid_params.i_gain_pct = policy->i_gain_pct; 2574 pid_params.d_gain_pct = policy->d_gain_pct; 2575 pid_params.deadband = policy->deadband; 2576 pid_params.setpoint = policy->setpoint; 2577 } 2578 2579 #ifdef CONFIG_ACPI 2580 static void intel_pstate_use_acpi_profile(void) 2581 { 2582 if (acpi_gbl_FADT.preferred_profile == PM_MOBILE) 2583 pstate_funcs.get_target_pstate = 2584 get_target_pstate_use_cpu_load; 2585 } 2586 #else 2587 static void intel_pstate_use_acpi_profile(void) 2588 { 2589 } 2590 #endif 2591 2592 static void __init copy_cpu_funcs(struct pstate_funcs *funcs) 2593 { 2594 pstate_funcs.get_max = funcs->get_max; 2595 pstate_funcs.get_max_physical = funcs->get_max_physical; 2596 pstate_funcs.get_min = funcs->get_min; 2597 pstate_funcs.get_turbo = funcs->get_turbo; 2598 pstate_funcs.get_scaling = funcs->get_scaling; 2599 pstate_funcs.get_val = funcs->get_val; 2600 pstate_funcs.get_vid = funcs->get_vid; 2601 pstate_funcs.get_target_pstate = funcs->get_target_pstate; 2602 2603 intel_pstate_use_acpi_profile(); 2604 } 2605 2606 #ifdef CONFIG_ACPI 2607 2608 static bool __init intel_pstate_no_acpi_pss(void) 2609 { 2610 int i; 2611 2612 for_each_possible_cpu(i) { 2613 acpi_status status; 2614 union acpi_object *pss; 2615 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 2616 struct acpi_processor *pr = per_cpu(processors, i); 2617 2618 if (!pr) 2619 continue; 2620 2621 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 2622 if (ACPI_FAILURE(status)) 2623 continue; 2624 2625 pss = buffer.pointer; 2626 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 2627 kfree(pss); 2628 return false; 2629 } 2630 2631 kfree(pss); 2632 } 2633 2634 return true; 2635 } 2636 2637 static bool __init intel_pstate_has_acpi_ppc(void) 2638 { 2639 int i; 2640 2641 for_each_possible_cpu(i) { 2642 struct acpi_processor *pr = per_cpu(processors, i); 2643 2644 if (!pr) 2645 continue; 2646 if (acpi_has_method(pr->handle, "_PPC")) 2647 return true; 2648 } 2649 return false; 2650 } 2651 2652 enum { 2653 PSS, 2654 PPC, 2655 }; 2656 2657 struct hw_vendor_info { 2658 u16 valid; 2659 char oem_id[ACPI_OEM_ID_SIZE]; 2660 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 2661 int oem_pwr_table; 2662 }; 2663 2664 /* Hardware vendor-specific info that has its own power management modes */ 2665 static struct hw_vendor_info vendor_info[] __initdata = { 2666 {1, "HP ", "ProLiant", PSS}, 2667 {1, "ORACLE", "X4-2 ", PPC}, 2668 {1, "ORACLE", "X4-2L ", PPC}, 2669 {1, "ORACLE", "X4-2B ", PPC}, 2670 {1, "ORACLE", "X3-2 ", PPC}, 2671 {1, "ORACLE", "X3-2L ", PPC}, 2672 {1, "ORACLE", "X3-2B ", PPC}, 2673 {1, "ORACLE", "X4470M2 ", PPC}, 2674 {1, "ORACLE", "X4270M3 ", PPC}, 2675 {1, "ORACLE", "X4270M2 ", PPC}, 2676 {1, "ORACLE", "X4170M2 ", PPC}, 2677 {1, "ORACLE", "X4170 M3", PPC}, 2678 {1, "ORACLE", "X4275 M3", PPC}, 2679 {1, "ORACLE", "X6-2 ", PPC}, 2680 {1, "ORACLE", "Sudbury ", PPC}, 2681 {0, "", ""}, 2682 }; 2683 2684 static bool __init intel_pstate_platform_pwr_mgmt_exists(void) 2685 { 2686 struct acpi_table_header hdr; 2687 struct hw_vendor_info *v_info; 2688 const struct x86_cpu_id *id; 2689 u64 misc_pwr; 2690 2691 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 2692 if (id) { 2693 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 2694 if ( misc_pwr & (1 << 8)) 2695 return true; 2696 } 2697 2698 if (acpi_disabled || 2699 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 2700 return false; 2701 2702 for (v_info = vendor_info; v_info->valid; v_info++) { 2703 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && 2704 !strncmp(hdr.oem_table_id, v_info->oem_table_id, 2705 ACPI_OEM_TABLE_ID_SIZE)) 2706 switch (v_info->oem_pwr_table) { 2707 case PSS: 2708 return intel_pstate_no_acpi_pss(); 2709 case PPC: 2710 return intel_pstate_has_acpi_ppc() && 2711 (!force_load); 2712 } 2713 } 2714 2715 return false; 2716 } 2717 2718 static void intel_pstate_request_control_from_smm(void) 2719 { 2720 /* 2721 * It may be unsafe to request P-states control from SMM if _PPC support 2722 * has not been enabled. 2723 */ 2724 if (acpi_ppc) 2725 acpi_processor_pstate_control(); 2726 } 2727 #else /* CONFIG_ACPI not enabled */ 2728 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 2729 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 2730 static inline void intel_pstate_request_control_from_smm(void) {} 2731 #endif /* CONFIG_ACPI */ 2732 2733 static const struct x86_cpu_id hwp_support_ids[] __initconst = { 2734 { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, 2735 {} 2736 }; 2737 2738 static int __init intel_pstate_init(void) 2739 { 2740 const struct x86_cpu_id *id; 2741 struct cpu_defaults *cpu_def; 2742 int rc = 0; 2743 2744 if (no_load) 2745 return -ENODEV; 2746 2747 if (x86_match_cpu(hwp_support_ids) && !no_hwp) { 2748 copy_cpu_funcs(&core_params.funcs); 2749 hwp_active++; 2750 intel_pstate.attr = hwp_cpufreq_attrs; 2751 goto hwp_cpu_matched; 2752 } 2753 2754 id = x86_match_cpu(intel_pstate_cpu_ids); 2755 if (!id) 2756 return -ENODEV; 2757 2758 cpu_def = (struct cpu_defaults *)id->driver_data; 2759 2760 copy_pid_params(&cpu_def->pid_policy); 2761 copy_cpu_funcs(&cpu_def->funcs); 2762 2763 if (intel_pstate_msrs_not_valid()) 2764 return -ENODEV; 2765 2766 hwp_cpu_matched: 2767 /* 2768 * The Intel pstate driver will be ignored if the platform 2769 * firmware has its own power management modes. 2770 */ 2771 if (intel_pstate_platform_pwr_mgmt_exists()) 2772 return -ENODEV; 2773 2774 if (!hwp_active && hwp_only) 2775 return -ENOTSUPP; 2776 2777 pr_info("Intel P-state driver initializing\n"); 2778 2779 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 2780 if (!all_cpu_data) 2781 return -ENOMEM; 2782 2783 intel_pstate_request_control_from_smm(); 2784 2785 intel_pstate_sysfs_expose_params(); 2786 2787 mutex_lock(&intel_pstate_driver_lock); 2788 rc = intel_pstate_register_driver(); 2789 mutex_unlock(&intel_pstate_driver_lock); 2790 if (rc) 2791 return rc; 2792 2793 if (hwp_active) 2794 pr_info("HWP enabled\n"); 2795 2796 return 0; 2797 } 2798 device_initcall(intel_pstate_init); 2799 2800 static int __init intel_pstate_setup(char *str) 2801 { 2802 if (!str) 2803 return -EINVAL; 2804 2805 if (!strcmp(str, "disable")) { 2806 no_load = 1; 2807 } else if (!strcmp(str, "passive")) { 2808 pr_info("Passive mode enabled\n"); 2809 intel_pstate_driver = &intel_cpufreq; 2810 no_hwp = 1; 2811 } 2812 if (!strcmp(str, "no_hwp")) { 2813 pr_info("HWP disabled\n"); 2814 no_hwp = 1; 2815 } 2816 if (!strcmp(str, "force")) 2817 force_load = 1; 2818 if (!strcmp(str, "hwp_only")) 2819 hwp_only = 1; 2820 if (!strcmp(str, "per_cpu_perf_limits")) 2821 per_cpu_limits = true; 2822 2823 #ifdef CONFIG_ACPI 2824 if (!strcmp(str, "support_acpi_ppc")) 2825 acpi_ppc = true; 2826 #endif 2827 2828 return 0; 2829 } 2830 early_param("intel_pstate", intel_pstate_setup); 2831 2832 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 2833 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 2834 MODULE_LICENSE("GPL"); 2835