1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/kernel.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/module.h> 18 #include <linux/ktime.h> 19 #include <linux/hrtimer.h> 20 #include <linux/tick.h> 21 #include <linux/slab.h> 22 #include <linux/sched.h> 23 #include <linux/list.h> 24 #include <linux/cpu.h> 25 #include <linux/cpufreq.h> 26 #include <linux/sysfs.h> 27 #include <linux/types.h> 28 #include <linux/fs.h> 29 #include <linux/debugfs.h> 30 #include <linux/acpi.h> 31 #include <linux/vmalloc.h> 32 #include <trace/events/power.h> 33 34 #include <asm/div64.h> 35 #include <asm/msr.h> 36 #include <asm/cpu_device_id.h> 37 #include <asm/cpufeature.h> 38 #include <asm/intel-family.h> 39 40 #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 41 42 #define ATOM_RATIOS 0x66a 43 #define ATOM_VIDS 0x66b 44 #define ATOM_TURBO_RATIOS 0x66c 45 #define ATOM_TURBO_VIDS 0x66d 46 47 #ifdef CONFIG_ACPI 48 #include <acpi/processor.h> 49 #endif 50 51 #define FRAC_BITS 8 52 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 53 #define fp_toint(X) ((X) >> FRAC_BITS) 54 55 #define EXT_BITS 6 56 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS) 57 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS) 58 #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS) 59 60 static inline int32_t mul_fp(int32_t x, int32_t y) 61 { 62 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 63 } 64 65 static inline int32_t div_fp(s64 x, s64 y) 66 { 67 return div64_s64((int64_t)x << FRAC_BITS, y); 68 } 69 70 static inline int ceiling_fp(int32_t x) 71 { 72 int mask, ret; 73 74 ret = fp_toint(x); 75 mask = (1 << FRAC_BITS) - 1; 76 if (x & mask) 77 ret += 1; 78 return ret; 79 } 80 81 static inline u64 mul_ext_fp(u64 x, u64 y) 82 { 83 return (x * y) >> EXT_FRAC_BITS; 84 } 85 86 static inline u64 div_ext_fp(u64 x, u64 y) 87 { 88 return div64_u64(x << EXT_FRAC_BITS, y); 89 } 90 91 /** 92 * struct sample - Store performance sample 93 * @core_avg_perf: Ratio of APERF/MPERF which is the actual average 94 * performance during last sample period 95 * @busy_scaled: Scaled busy value which is used to calculate next 96 * P state. This can be different than core_avg_perf 97 * to account for cpu idle period 98 * @aperf: Difference of actual performance frequency clock count 99 * read from APERF MSR between last and current sample 100 * @mperf: Difference of maximum performance frequency clock count 101 * read from MPERF MSR between last and current sample 102 * @tsc: Difference of time stamp counter between last and 103 * current sample 104 * @time: Current time from scheduler 105 * 106 * This structure is used in the cpudata structure to store performance sample 107 * data for choosing next P State. 108 */ 109 struct sample { 110 int32_t core_avg_perf; 111 int32_t busy_scaled; 112 u64 aperf; 113 u64 mperf; 114 u64 tsc; 115 u64 time; 116 }; 117 118 /** 119 * struct pstate_data - Store P state data 120 * @current_pstate: Current requested P state 121 * @min_pstate: Min P state possible for this platform 122 * @max_pstate: Max P state possible for this platform 123 * @max_pstate_physical:This is physical Max P state for a processor 124 * This can be higher than the max_pstate which can 125 * be limited by platform thermal design power limits 126 * @scaling: Scaling factor to convert frequency to cpufreq 127 * frequency units 128 * @turbo_pstate: Max Turbo P state possible for this platform 129 * @max_freq: @max_pstate frequency in cpufreq units 130 * @turbo_freq: @turbo_pstate frequency in cpufreq units 131 * 132 * Stores the per cpu model P state limits and current P state. 133 */ 134 struct pstate_data { 135 int current_pstate; 136 int min_pstate; 137 int max_pstate; 138 int max_pstate_physical; 139 int scaling; 140 int turbo_pstate; 141 unsigned int max_freq; 142 unsigned int turbo_freq; 143 }; 144 145 /** 146 * struct vid_data - Stores voltage information data 147 * @min: VID data for this platform corresponding to 148 * the lowest P state 149 * @max: VID data corresponding to the highest P State. 150 * @turbo: VID data for turbo P state 151 * @ratio: Ratio of (vid max - vid min) / 152 * (max P state - Min P State) 153 * 154 * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) 155 * This data is used in Atom platforms, where in addition to target P state, 156 * the voltage data needs to be specified to select next P State. 157 */ 158 struct vid_data { 159 int min; 160 int max; 161 int turbo; 162 int32_t ratio; 163 }; 164 165 /** 166 * struct _pid - Stores PID data 167 * @setpoint: Target set point for busyness or performance 168 * @integral: Storage for accumulated error values 169 * @p_gain: PID proportional gain 170 * @i_gain: PID integral gain 171 * @d_gain: PID derivative gain 172 * @deadband: PID deadband 173 * @last_err: Last error storage for integral part of PID calculation 174 * 175 * Stores PID coefficients and last error for PID controller. 176 */ 177 struct _pid { 178 int setpoint; 179 int32_t integral; 180 int32_t p_gain; 181 int32_t i_gain; 182 int32_t d_gain; 183 int deadband; 184 int32_t last_err; 185 }; 186 187 /** 188 * struct perf_limits - Store user and policy limits 189 * @no_turbo: User requested turbo state from intel_pstate sysfs 190 * @turbo_disabled: Platform turbo status either from msr 191 * MSR_IA32_MISC_ENABLE or when maximum available pstate 192 * matches the maximum turbo pstate 193 * @max_perf_pct: Effective maximum performance limit in percentage, this 194 * is minimum of either limits enforced by cpufreq policy 195 * or limits from user set limits via intel_pstate sysfs 196 * @min_perf_pct: Effective minimum performance limit in percentage, this 197 * is maximum of either limits enforced by cpufreq policy 198 * or limits from user set limits via intel_pstate sysfs 199 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct 200 * This value is used to limit max pstate 201 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct 202 * This value is used to limit min pstate 203 * @max_policy_pct: The maximum performance in percentage enforced by 204 * cpufreq setpolicy interface 205 * @max_sysfs_pct: The maximum performance in percentage enforced by 206 * intel pstate sysfs interface, unused when per cpu 207 * controls are enforced 208 * @min_policy_pct: The minimum performance in percentage enforced by 209 * cpufreq setpolicy interface 210 * @min_sysfs_pct: The minimum performance in percentage enforced by 211 * intel pstate sysfs interface, unused when per cpu 212 * controls are enforced 213 * 214 * Storage for user and policy defined limits. 215 */ 216 struct perf_limits { 217 int no_turbo; 218 int turbo_disabled; 219 int max_perf_pct; 220 int min_perf_pct; 221 int32_t max_perf; 222 int32_t min_perf; 223 int max_policy_pct; 224 int max_sysfs_pct; 225 int min_policy_pct; 226 int min_sysfs_pct; 227 }; 228 229 /** 230 * struct cpudata - Per CPU instance data storage 231 * @cpu: CPU number for this instance data 232 * @policy: CPUFreq policy value 233 * @update_util: CPUFreq utility callback information 234 * @update_util_set: CPUFreq utility callback is set 235 * @iowait_boost: iowait-related boost fraction 236 * @last_update: Time of the last update. 237 * @pstate: Stores P state limits for this CPU 238 * @vid: Stores VID limits for this CPU 239 * @pid: Stores PID parameters for this CPU 240 * @last_sample_time: Last Sample time 241 * @prev_aperf: Last APERF value read from APERF MSR 242 * @prev_mperf: Last MPERF value read from MPERF MSR 243 * @prev_tsc: Last timestamp counter (TSC) value 244 * @prev_cummulative_iowait: IO Wait time difference from last and 245 * current sample 246 * @sample: Storage for storing last Sample data 247 * @perf_limits: Pointer to perf_limit unique to this CPU 248 * Not all field in the structure are applicable 249 * when per cpu controls are enforced 250 * @acpi_perf_data: Stores ACPI perf information read from _PSS 251 * @valid_pss_table: Set to true for valid ACPI _PSS entries found 252 * 253 * This structure stores per CPU instance data for all CPUs. 254 */ 255 struct cpudata { 256 int cpu; 257 258 unsigned int policy; 259 struct update_util_data update_util; 260 bool update_util_set; 261 262 struct pstate_data pstate; 263 struct vid_data vid; 264 struct _pid pid; 265 266 u64 last_update; 267 u64 last_sample_time; 268 u64 prev_aperf; 269 u64 prev_mperf; 270 u64 prev_tsc; 271 u64 prev_cummulative_iowait; 272 struct sample sample; 273 struct perf_limits *perf_limits; 274 #ifdef CONFIG_ACPI 275 struct acpi_processor_performance acpi_perf_data; 276 bool valid_pss_table; 277 #endif 278 unsigned int iowait_boost; 279 }; 280 281 static struct cpudata **all_cpu_data; 282 283 /** 284 * struct pstate_adjust_policy - Stores static PID configuration data 285 * @sample_rate_ms: PID calculation sample rate in ms 286 * @sample_rate_ns: Sample rate calculation in ns 287 * @deadband: PID deadband 288 * @setpoint: PID Setpoint 289 * @p_gain_pct: PID proportional gain 290 * @i_gain_pct: PID integral gain 291 * @d_gain_pct: PID derivative gain 292 * 293 * Stores per CPU model static PID configuration data. 294 */ 295 struct pstate_adjust_policy { 296 int sample_rate_ms; 297 s64 sample_rate_ns; 298 int deadband; 299 int setpoint; 300 int p_gain_pct; 301 int d_gain_pct; 302 int i_gain_pct; 303 }; 304 305 /** 306 * struct pstate_funcs - Per CPU model specific callbacks 307 * @get_max: Callback to get maximum non turbo effective P state 308 * @get_max_physical: Callback to get maximum non turbo physical P state 309 * @get_min: Callback to get minimum P state 310 * @get_turbo: Callback to get turbo P state 311 * @get_scaling: Callback to get frequency scaling factor 312 * @get_val: Callback to convert P state to actual MSR write value 313 * @get_vid: Callback to get VID data for Atom platforms 314 * @get_target_pstate: Callback to a function to calculate next P state to use 315 * 316 * Core and Atom CPU models have different way to get P State limits. This 317 * structure is used to store those callbacks. 318 */ 319 struct pstate_funcs { 320 int (*get_max)(void); 321 int (*get_max_physical)(void); 322 int (*get_min)(void); 323 int (*get_turbo)(void); 324 int (*get_scaling)(void); 325 u64 (*get_val)(struct cpudata*, int pstate); 326 void (*get_vid)(struct cpudata *); 327 int32_t (*get_target_pstate)(struct cpudata *); 328 }; 329 330 /** 331 * struct cpu_defaults- Per CPU model default config data 332 * @pid_policy: PID config data 333 * @funcs: Callback function data 334 */ 335 struct cpu_defaults { 336 struct pstate_adjust_policy pid_policy; 337 struct pstate_funcs funcs; 338 }; 339 340 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); 341 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); 342 343 static struct pstate_adjust_policy pid_params __read_mostly; 344 static struct pstate_funcs pstate_funcs __read_mostly; 345 static int hwp_active __read_mostly; 346 static bool per_cpu_limits __read_mostly; 347 348 #ifdef CONFIG_ACPI 349 static bool acpi_ppc; 350 #endif 351 352 static struct perf_limits performance_limits = { 353 .no_turbo = 0, 354 .turbo_disabled = 0, 355 .max_perf_pct = 100, 356 .max_perf = int_ext_tofp(1), 357 .min_perf_pct = 100, 358 .min_perf = int_ext_tofp(1), 359 .max_policy_pct = 100, 360 .max_sysfs_pct = 100, 361 .min_policy_pct = 0, 362 .min_sysfs_pct = 0, 363 }; 364 365 static struct perf_limits powersave_limits = { 366 .no_turbo = 0, 367 .turbo_disabled = 0, 368 .max_perf_pct = 100, 369 .max_perf = int_ext_tofp(1), 370 .min_perf_pct = 0, 371 .min_perf = 0, 372 .max_policy_pct = 100, 373 .max_sysfs_pct = 100, 374 .min_policy_pct = 0, 375 .min_sysfs_pct = 0, 376 }; 377 378 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE 379 static struct perf_limits *limits = &performance_limits; 380 #else 381 static struct perf_limits *limits = &powersave_limits; 382 #endif 383 384 static DEFINE_MUTEX(intel_pstate_limits_lock); 385 386 #ifdef CONFIG_ACPI 387 388 static bool intel_pstate_get_ppc_enable_status(void) 389 { 390 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || 391 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) 392 return true; 393 394 return acpi_ppc; 395 } 396 397 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 398 { 399 struct cpudata *cpu; 400 int ret; 401 int i; 402 403 if (hwp_active) 404 return; 405 406 if (!intel_pstate_get_ppc_enable_status()) 407 return; 408 409 cpu = all_cpu_data[policy->cpu]; 410 411 ret = acpi_processor_register_performance(&cpu->acpi_perf_data, 412 policy->cpu); 413 if (ret) 414 return; 415 416 /* 417 * Check if the control value in _PSS is for PERF_CTL MSR, which should 418 * guarantee that the states returned by it map to the states in our 419 * list directly. 420 */ 421 if (cpu->acpi_perf_data.control_register.space_id != 422 ACPI_ADR_SPACE_FIXED_HARDWARE) 423 goto err; 424 425 /* 426 * If there is only one entry _PSS, simply ignore _PSS and continue as 427 * usual without taking _PSS into account 428 */ 429 if (cpu->acpi_perf_data.state_count < 2) 430 goto err; 431 432 pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); 433 for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { 434 pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", 435 (i == cpu->acpi_perf_data.state ? '*' : ' '), i, 436 (u32) cpu->acpi_perf_data.states[i].core_frequency, 437 (u32) cpu->acpi_perf_data.states[i].power, 438 (u32) cpu->acpi_perf_data.states[i].control); 439 } 440 441 /* 442 * The _PSS table doesn't contain whole turbo frequency range. 443 * This just contains +1 MHZ above the max non turbo frequency, 444 * with control value corresponding to max turbo ratio. But 445 * when cpufreq set policy is called, it will call with this 446 * max frequency, which will cause a reduced performance as 447 * this driver uses real max turbo frequency as the max 448 * frequency. So correct this frequency in _PSS table to 449 * correct max turbo frequency based on the turbo state. 450 * Also need to convert to MHz as _PSS freq is in MHz. 451 */ 452 if (!limits->turbo_disabled) 453 cpu->acpi_perf_data.states[0].core_frequency = 454 policy->cpuinfo.max_freq / 1000; 455 cpu->valid_pss_table = true; 456 pr_debug("_PPC limits will be enforced\n"); 457 458 return; 459 460 err: 461 cpu->valid_pss_table = false; 462 acpi_processor_unregister_performance(policy->cpu); 463 } 464 465 static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 466 { 467 struct cpudata *cpu; 468 469 cpu = all_cpu_data[policy->cpu]; 470 if (!cpu->valid_pss_table) 471 return; 472 473 acpi_processor_unregister_performance(policy->cpu); 474 } 475 476 #else 477 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 478 { 479 } 480 481 static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 482 { 483 } 484 #endif 485 486 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 487 int deadband, int integral) { 488 pid->setpoint = int_tofp(setpoint); 489 pid->deadband = int_tofp(deadband); 490 pid->integral = int_tofp(integral); 491 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 492 } 493 494 static inline void pid_p_gain_set(struct _pid *pid, int percent) 495 { 496 pid->p_gain = div_fp(percent, 100); 497 } 498 499 static inline void pid_i_gain_set(struct _pid *pid, int percent) 500 { 501 pid->i_gain = div_fp(percent, 100); 502 } 503 504 static inline void pid_d_gain_set(struct _pid *pid, int percent) 505 { 506 pid->d_gain = div_fp(percent, 100); 507 } 508 509 static signed int pid_calc(struct _pid *pid, int32_t busy) 510 { 511 signed int result; 512 int32_t pterm, dterm, fp_error; 513 int32_t integral_limit; 514 515 fp_error = pid->setpoint - busy; 516 517 if (abs(fp_error) <= pid->deadband) 518 return 0; 519 520 pterm = mul_fp(pid->p_gain, fp_error); 521 522 pid->integral += fp_error; 523 524 /* 525 * We limit the integral here so that it will never 526 * get higher than 30. This prevents it from becoming 527 * too large an input over long periods of time and allows 528 * it to get factored out sooner. 529 * 530 * The value of 30 was chosen through experimentation. 531 */ 532 integral_limit = int_tofp(30); 533 if (pid->integral > integral_limit) 534 pid->integral = integral_limit; 535 if (pid->integral < -integral_limit) 536 pid->integral = -integral_limit; 537 538 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 539 pid->last_err = fp_error; 540 541 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 542 result = result + (1 << (FRAC_BITS-1)); 543 return (signed int)fp_toint(result); 544 } 545 546 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 547 { 548 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 549 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 550 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 551 552 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); 553 } 554 555 static inline void intel_pstate_reset_all_pid(void) 556 { 557 unsigned int cpu; 558 559 for_each_online_cpu(cpu) { 560 if (all_cpu_data[cpu]) 561 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 562 } 563 } 564 565 static inline void update_turbo_state(void) 566 { 567 u64 misc_en; 568 struct cpudata *cpu; 569 570 cpu = all_cpu_data[0]; 571 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 572 limits->turbo_disabled = 573 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 574 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 575 } 576 577 static void intel_pstate_hwp_set(const struct cpumask *cpumask) 578 { 579 int min, hw_min, max, hw_max, cpu, range, adj_range; 580 struct perf_limits *perf_limits = limits; 581 u64 value, cap; 582 583 for_each_cpu(cpu, cpumask) { 584 int max_perf_pct, min_perf_pct; 585 586 if (per_cpu_limits) 587 perf_limits = all_cpu_data[cpu]->perf_limits; 588 589 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 590 hw_min = HWP_LOWEST_PERF(cap); 591 hw_max = HWP_HIGHEST_PERF(cap); 592 range = hw_max - hw_min; 593 594 max_perf_pct = perf_limits->max_perf_pct; 595 min_perf_pct = perf_limits->min_perf_pct; 596 597 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 598 adj_range = min_perf_pct * range / 100; 599 min = hw_min + adj_range; 600 value &= ~HWP_MIN_PERF(~0L); 601 value |= HWP_MIN_PERF(min); 602 603 adj_range = max_perf_pct * range / 100; 604 max = hw_min + adj_range; 605 if (limits->no_turbo) { 606 hw_max = HWP_GUARANTEED_PERF(cap); 607 if (hw_max < max) 608 max = hw_max; 609 } 610 611 value &= ~HWP_MAX_PERF(~0L); 612 value |= HWP_MAX_PERF(max); 613 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 614 } 615 } 616 617 static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) 618 { 619 if (hwp_active) 620 intel_pstate_hwp_set(policy->cpus); 621 622 return 0; 623 } 624 625 static void intel_pstate_hwp_set_online_cpus(void) 626 { 627 get_online_cpus(); 628 intel_pstate_hwp_set(cpu_online_mask); 629 put_online_cpus(); 630 } 631 632 /************************** debugfs begin ************************/ 633 static int pid_param_set(void *data, u64 val) 634 { 635 *(u32 *)data = val; 636 intel_pstate_reset_all_pid(); 637 return 0; 638 } 639 640 static int pid_param_get(void *data, u64 *val) 641 { 642 *val = *(u32 *)data; 643 return 0; 644 } 645 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); 646 647 struct pid_param { 648 char *name; 649 void *value; 650 }; 651 652 static struct pid_param pid_files[] = { 653 {"sample_rate_ms", &pid_params.sample_rate_ms}, 654 {"d_gain_pct", &pid_params.d_gain_pct}, 655 {"i_gain_pct", &pid_params.i_gain_pct}, 656 {"deadband", &pid_params.deadband}, 657 {"setpoint", &pid_params.setpoint}, 658 {"p_gain_pct", &pid_params.p_gain_pct}, 659 {NULL, NULL} 660 }; 661 662 static void __init intel_pstate_debug_expose_params(void) 663 { 664 struct dentry *debugfs_parent; 665 int i = 0; 666 667 if (hwp_active || 668 pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) 669 return; 670 671 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 672 if (IS_ERR_OR_NULL(debugfs_parent)) 673 return; 674 while (pid_files[i].name) { 675 debugfs_create_file(pid_files[i].name, 0660, 676 debugfs_parent, pid_files[i].value, 677 &fops_pid_param); 678 i++; 679 } 680 } 681 682 /************************** debugfs end ************************/ 683 684 /************************** sysfs begin ************************/ 685 #define show_one(file_name, object) \ 686 static ssize_t show_##file_name \ 687 (struct kobject *kobj, struct attribute *attr, char *buf) \ 688 { \ 689 return sprintf(buf, "%u\n", limits->object); \ 690 } 691 692 static ssize_t show_turbo_pct(struct kobject *kobj, 693 struct attribute *attr, char *buf) 694 { 695 struct cpudata *cpu; 696 int total, no_turbo, turbo_pct; 697 uint32_t turbo_fp; 698 699 cpu = all_cpu_data[0]; 700 701 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 702 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; 703 turbo_fp = div_fp(no_turbo, total); 704 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); 705 return sprintf(buf, "%u\n", turbo_pct); 706 } 707 708 static ssize_t show_num_pstates(struct kobject *kobj, 709 struct attribute *attr, char *buf) 710 { 711 struct cpudata *cpu; 712 int total; 713 714 cpu = all_cpu_data[0]; 715 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 716 return sprintf(buf, "%u\n", total); 717 } 718 719 static ssize_t show_no_turbo(struct kobject *kobj, 720 struct attribute *attr, char *buf) 721 { 722 ssize_t ret; 723 724 update_turbo_state(); 725 if (limits->turbo_disabled) 726 ret = sprintf(buf, "%u\n", limits->turbo_disabled); 727 else 728 ret = sprintf(buf, "%u\n", limits->no_turbo); 729 730 return ret; 731 } 732 733 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 734 const char *buf, size_t count) 735 { 736 unsigned int input; 737 int ret; 738 739 ret = sscanf(buf, "%u", &input); 740 if (ret != 1) 741 return -EINVAL; 742 743 mutex_lock(&intel_pstate_limits_lock); 744 745 update_turbo_state(); 746 if (limits->turbo_disabled) { 747 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 748 mutex_unlock(&intel_pstate_limits_lock); 749 return -EPERM; 750 } 751 752 limits->no_turbo = clamp_t(int, input, 0, 1); 753 754 mutex_unlock(&intel_pstate_limits_lock); 755 756 if (hwp_active) 757 intel_pstate_hwp_set_online_cpus(); 758 759 return count; 760 } 761 762 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 763 const char *buf, size_t count) 764 { 765 unsigned int input; 766 int ret; 767 768 ret = sscanf(buf, "%u", &input); 769 if (ret != 1) 770 return -EINVAL; 771 772 mutex_lock(&intel_pstate_limits_lock); 773 774 limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); 775 limits->max_perf_pct = min(limits->max_policy_pct, 776 limits->max_sysfs_pct); 777 limits->max_perf_pct = max(limits->min_policy_pct, 778 limits->max_perf_pct); 779 limits->max_perf_pct = max(limits->min_perf_pct, 780 limits->max_perf_pct); 781 limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); 782 783 mutex_unlock(&intel_pstate_limits_lock); 784 785 if (hwp_active) 786 intel_pstate_hwp_set_online_cpus(); 787 return count; 788 } 789 790 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 791 const char *buf, size_t count) 792 { 793 unsigned int input; 794 int ret; 795 796 ret = sscanf(buf, "%u", &input); 797 if (ret != 1) 798 return -EINVAL; 799 800 mutex_lock(&intel_pstate_limits_lock); 801 802 limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); 803 limits->min_perf_pct = max(limits->min_policy_pct, 804 limits->min_sysfs_pct); 805 limits->min_perf_pct = min(limits->max_policy_pct, 806 limits->min_perf_pct); 807 limits->min_perf_pct = min(limits->max_perf_pct, 808 limits->min_perf_pct); 809 limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); 810 811 mutex_unlock(&intel_pstate_limits_lock); 812 813 if (hwp_active) 814 intel_pstate_hwp_set_online_cpus(); 815 return count; 816 } 817 818 show_one(max_perf_pct, max_perf_pct); 819 show_one(min_perf_pct, min_perf_pct); 820 821 define_one_global_rw(no_turbo); 822 define_one_global_rw(max_perf_pct); 823 define_one_global_rw(min_perf_pct); 824 define_one_global_ro(turbo_pct); 825 define_one_global_ro(num_pstates); 826 827 static struct attribute *intel_pstate_attributes[] = { 828 &no_turbo.attr, 829 &turbo_pct.attr, 830 &num_pstates.attr, 831 NULL 832 }; 833 834 static struct attribute_group intel_pstate_attr_group = { 835 .attrs = intel_pstate_attributes, 836 }; 837 838 static void __init intel_pstate_sysfs_expose_params(void) 839 { 840 struct kobject *intel_pstate_kobject; 841 int rc; 842 843 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 844 &cpu_subsys.dev_root->kobj); 845 if (WARN_ON(!intel_pstate_kobject)) 846 return; 847 848 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 849 if (WARN_ON(rc)) 850 return; 851 852 /* 853 * If per cpu limits are enforced there are no global limits, so 854 * return without creating max/min_perf_pct attributes 855 */ 856 if (per_cpu_limits) 857 return; 858 859 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr); 860 WARN_ON(rc); 861 862 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); 863 WARN_ON(rc); 864 865 } 866 /************************** sysfs end ************************/ 867 868 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 869 { 870 /* First disable HWP notification interrupt as we don't process them */ 871 if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) 872 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); 873 874 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 875 } 876 877 static int atom_get_min_pstate(void) 878 { 879 u64 value; 880 881 rdmsrl(ATOM_RATIOS, value); 882 return (value >> 8) & 0x7F; 883 } 884 885 static int atom_get_max_pstate(void) 886 { 887 u64 value; 888 889 rdmsrl(ATOM_RATIOS, value); 890 return (value >> 16) & 0x7F; 891 } 892 893 static int atom_get_turbo_pstate(void) 894 { 895 u64 value; 896 897 rdmsrl(ATOM_TURBO_RATIOS, value); 898 return value & 0x7F; 899 } 900 901 static u64 atom_get_val(struct cpudata *cpudata, int pstate) 902 { 903 u64 val; 904 int32_t vid_fp; 905 u32 vid; 906 907 val = (u64)pstate << 8; 908 if (limits->no_turbo && !limits->turbo_disabled) 909 val |= (u64)1 << 32; 910 911 vid_fp = cpudata->vid.min + mul_fp( 912 int_tofp(pstate - cpudata->pstate.min_pstate), 913 cpudata->vid.ratio); 914 915 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 916 vid = ceiling_fp(vid_fp); 917 918 if (pstate > cpudata->pstate.max_pstate) 919 vid = cpudata->vid.turbo; 920 921 return val | vid; 922 } 923 924 static int silvermont_get_scaling(void) 925 { 926 u64 value; 927 int i; 928 /* Defined in Table 35-6 from SDM (Sept 2015) */ 929 static int silvermont_freq_table[] = { 930 83300, 100000, 133300, 116700, 80000}; 931 932 rdmsrl(MSR_FSB_FREQ, value); 933 i = value & 0x7; 934 WARN_ON(i > 4); 935 936 return silvermont_freq_table[i]; 937 } 938 939 static int airmont_get_scaling(void) 940 { 941 u64 value; 942 int i; 943 /* Defined in Table 35-10 from SDM (Sept 2015) */ 944 static int airmont_freq_table[] = { 945 83300, 100000, 133300, 116700, 80000, 946 93300, 90000, 88900, 87500}; 947 948 rdmsrl(MSR_FSB_FREQ, value); 949 i = value & 0xF; 950 WARN_ON(i > 8); 951 952 return airmont_freq_table[i]; 953 } 954 955 static void atom_get_vid(struct cpudata *cpudata) 956 { 957 u64 value; 958 959 rdmsrl(ATOM_VIDS, value); 960 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 961 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 962 cpudata->vid.ratio = div_fp( 963 cpudata->vid.max - cpudata->vid.min, 964 int_tofp(cpudata->pstate.max_pstate - 965 cpudata->pstate.min_pstate)); 966 967 rdmsrl(ATOM_TURBO_VIDS, value); 968 cpudata->vid.turbo = value & 0x7f; 969 } 970 971 static int core_get_min_pstate(void) 972 { 973 u64 value; 974 975 rdmsrl(MSR_PLATFORM_INFO, value); 976 return (value >> 40) & 0xFF; 977 } 978 979 static int core_get_max_pstate_physical(void) 980 { 981 u64 value; 982 983 rdmsrl(MSR_PLATFORM_INFO, value); 984 return (value >> 8) & 0xFF; 985 } 986 987 static int core_get_max_pstate(void) 988 { 989 u64 tar; 990 u64 plat_info; 991 int max_pstate; 992 int err; 993 994 rdmsrl(MSR_PLATFORM_INFO, plat_info); 995 max_pstate = (plat_info >> 8) & 0xFF; 996 997 err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); 998 if (!err) { 999 /* Do some sanity checking for safety */ 1000 if (plat_info & 0x600000000) { 1001 u64 tdp_ctrl; 1002 u64 tdp_ratio; 1003 int tdp_msr; 1004 1005 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); 1006 if (err) 1007 goto skip_tar; 1008 1009 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); 1010 err = rdmsrl_safe(tdp_msr, &tdp_ratio); 1011 if (err) 1012 goto skip_tar; 1013 1014 /* For level 1 and 2, bits[23:16] contain the ratio */ 1015 if (tdp_ctrl) 1016 tdp_ratio >>= 16; 1017 1018 tdp_ratio &= 0xff; /* ratios are only 8 bits long */ 1019 if (tdp_ratio - 1 == tar) { 1020 max_pstate = tar; 1021 pr_debug("max_pstate=TAC %x\n", max_pstate); 1022 } else { 1023 goto skip_tar; 1024 } 1025 } 1026 } 1027 1028 skip_tar: 1029 return max_pstate; 1030 } 1031 1032 static int core_get_turbo_pstate(void) 1033 { 1034 u64 value; 1035 int nont, ret; 1036 1037 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1038 nont = core_get_max_pstate(); 1039 ret = (value) & 255; 1040 if (ret <= nont) 1041 ret = nont; 1042 return ret; 1043 } 1044 1045 static inline int core_get_scaling(void) 1046 { 1047 return 100000; 1048 } 1049 1050 static u64 core_get_val(struct cpudata *cpudata, int pstate) 1051 { 1052 u64 val; 1053 1054 val = (u64)pstate << 8; 1055 if (limits->no_turbo && !limits->turbo_disabled) 1056 val |= (u64)1 << 32; 1057 1058 return val; 1059 } 1060 1061 static int knl_get_turbo_pstate(void) 1062 { 1063 u64 value; 1064 int nont, ret; 1065 1066 rdmsrl(MSR_TURBO_RATIO_LIMIT, value); 1067 nont = core_get_max_pstate(); 1068 ret = (((value) >> 8) & 0xFF); 1069 if (ret <= nont) 1070 ret = nont; 1071 return ret; 1072 } 1073 1074 static struct cpu_defaults core_params = { 1075 .pid_policy = { 1076 .sample_rate_ms = 10, 1077 .deadband = 0, 1078 .setpoint = 97, 1079 .p_gain_pct = 20, 1080 .d_gain_pct = 0, 1081 .i_gain_pct = 0, 1082 }, 1083 .funcs = { 1084 .get_max = core_get_max_pstate, 1085 .get_max_physical = core_get_max_pstate_physical, 1086 .get_min = core_get_min_pstate, 1087 .get_turbo = core_get_turbo_pstate, 1088 .get_scaling = core_get_scaling, 1089 .get_val = core_get_val, 1090 .get_target_pstate = get_target_pstate_use_performance, 1091 }, 1092 }; 1093 1094 static const struct cpu_defaults silvermont_params = { 1095 .pid_policy = { 1096 .sample_rate_ms = 10, 1097 .deadband = 0, 1098 .setpoint = 60, 1099 .p_gain_pct = 14, 1100 .d_gain_pct = 0, 1101 .i_gain_pct = 4, 1102 }, 1103 .funcs = { 1104 .get_max = atom_get_max_pstate, 1105 .get_max_physical = atom_get_max_pstate, 1106 .get_min = atom_get_min_pstate, 1107 .get_turbo = atom_get_turbo_pstate, 1108 .get_val = atom_get_val, 1109 .get_scaling = silvermont_get_scaling, 1110 .get_vid = atom_get_vid, 1111 .get_target_pstate = get_target_pstate_use_cpu_load, 1112 }, 1113 }; 1114 1115 static const struct cpu_defaults airmont_params = { 1116 .pid_policy = { 1117 .sample_rate_ms = 10, 1118 .deadband = 0, 1119 .setpoint = 60, 1120 .p_gain_pct = 14, 1121 .d_gain_pct = 0, 1122 .i_gain_pct = 4, 1123 }, 1124 .funcs = { 1125 .get_max = atom_get_max_pstate, 1126 .get_max_physical = atom_get_max_pstate, 1127 .get_min = atom_get_min_pstate, 1128 .get_turbo = atom_get_turbo_pstate, 1129 .get_val = atom_get_val, 1130 .get_scaling = airmont_get_scaling, 1131 .get_vid = atom_get_vid, 1132 .get_target_pstate = get_target_pstate_use_cpu_load, 1133 }, 1134 }; 1135 1136 static const struct cpu_defaults knl_params = { 1137 .pid_policy = { 1138 .sample_rate_ms = 10, 1139 .deadband = 0, 1140 .setpoint = 97, 1141 .p_gain_pct = 20, 1142 .d_gain_pct = 0, 1143 .i_gain_pct = 0, 1144 }, 1145 .funcs = { 1146 .get_max = core_get_max_pstate, 1147 .get_max_physical = core_get_max_pstate_physical, 1148 .get_min = core_get_min_pstate, 1149 .get_turbo = knl_get_turbo_pstate, 1150 .get_scaling = core_get_scaling, 1151 .get_val = core_get_val, 1152 .get_target_pstate = get_target_pstate_use_performance, 1153 }, 1154 }; 1155 1156 static const struct cpu_defaults bxt_params = { 1157 .pid_policy = { 1158 .sample_rate_ms = 10, 1159 .deadband = 0, 1160 .setpoint = 60, 1161 .p_gain_pct = 14, 1162 .d_gain_pct = 0, 1163 .i_gain_pct = 4, 1164 }, 1165 .funcs = { 1166 .get_max = core_get_max_pstate, 1167 .get_max_physical = core_get_max_pstate_physical, 1168 .get_min = core_get_min_pstate, 1169 .get_turbo = core_get_turbo_pstate, 1170 .get_scaling = core_get_scaling, 1171 .get_val = core_get_val, 1172 .get_target_pstate = get_target_pstate_use_cpu_load, 1173 }, 1174 }; 1175 1176 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 1177 { 1178 int max_perf = cpu->pstate.turbo_pstate; 1179 int max_perf_adj; 1180 int min_perf; 1181 struct perf_limits *perf_limits = limits; 1182 1183 if (limits->no_turbo || limits->turbo_disabled) 1184 max_perf = cpu->pstate.max_pstate; 1185 1186 if (per_cpu_limits) 1187 perf_limits = cpu->perf_limits; 1188 1189 /* 1190 * performance can be limited by user through sysfs, by cpufreq 1191 * policy, or by cpu specific default values determined through 1192 * experimentation. 1193 */ 1194 max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf); 1195 *max = clamp_t(int, max_perf_adj, 1196 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 1197 1198 min_perf = fp_ext_toint(max_perf * perf_limits->min_perf); 1199 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 1200 } 1201 1202 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 1203 { 1204 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 1205 cpu->pstate.current_pstate = pstate; 1206 /* 1207 * Generally, there is no guarantee that this code will always run on 1208 * the CPU being updated, so force the register update to run on the 1209 * right CPU. 1210 */ 1211 wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, 1212 pstate_funcs.get_val(cpu, pstate)); 1213 } 1214 1215 static void intel_pstate_set_min_pstate(struct cpudata *cpu) 1216 { 1217 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 1218 } 1219 1220 static void intel_pstate_max_within_limits(struct cpudata *cpu) 1221 { 1222 int min_pstate, max_pstate; 1223 1224 update_turbo_state(); 1225 intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); 1226 intel_pstate_set_pstate(cpu, max_pstate); 1227 } 1228 1229 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 1230 { 1231 cpu->pstate.min_pstate = pstate_funcs.get_min(); 1232 cpu->pstate.max_pstate = pstate_funcs.get_max(); 1233 cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); 1234 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 1235 cpu->pstate.scaling = pstate_funcs.get_scaling(); 1236 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; 1237 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1238 1239 if (pstate_funcs.get_vid) 1240 pstate_funcs.get_vid(cpu); 1241 1242 intel_pstate_set_min_pstate(cpu); 1243 } 1244 1245 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) 1246 { 1247 struct sample *sample = &cpu->sample; 1248 1249 sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf); 1250 } 1251 1252 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) 1253 { 1254 u64 aperf, mperf; 1255 unsigned long flags; 1256 u64 tsc; 1257 1258 local_irq_save(flags); 1259 rdmsrl(MSR_IA32_APERF, aperf); 1260 rdmsrl(MSR_IA32_MPERF, mperf); 1261 tsc = rdtsc(); 1262 if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { 1263 local_irq_restore(flags); 1264 return false; 1265 } 1266 local_irq_restore(flags); 1267 1268 cpu->last_sample_time = cpu->sample.time; 1269 cpu->sample.time = time; 1270 cpu->sample.aperf = aperf; 1271 cpu->sample.mperf = mperf; 1272 cpu->sample.tsc = tsc; 1273 cpu->sample.aperf -= cpu->prev_aperf; 1274 cpu->sample.mperf -= cpu->prev_mperf; 1275 cpu->sample.tsc -= cpu->prev_tsc; 1276 1277 cpu->prev_aperf = aperf; 1278 cpu->prev_mperf = mperf; 1279 cpu->prev_tsc = tsc; 1280 /* 1281 * First time this function is invoked in a given cycle, all of the 1282 * previous sample data fields are equal to zero or stale and they must 1283 * be populated with meaningful numbers for things to work, so assume 1284 * that sample.time will always be reset before setting the utilization 1285 * update hook and make the caller skip the sample then. 1286 */ 1287 return !!cpu->last_sample_time; 1288 } 1289 1290 static inline int32_t get_avg_frequency(struct cpudata *cpu) 1291 { 1292 return mul_ext_fp(cpu->sample.core_avg_perf, 1293 cpu->pstate.max_pstate_physical * cpu->pstate.scaling); 1294 } 1295 1296 static inline int32_t get_avg_pstate(struct cpudata *cpu) 1297 { 1298 return mul_ext_fp(cpu->pstate.max_pstate_physical, 1299 cpu->sample.core_avg_perf); 1300 } 1301 1302 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) 1303 { 1304 struct sample *sample = &cpu->sample; 1305 int32_t busy_frac, boost; 1306 int target, avg_pstate; 1307 1308 busy_frac = div_fp(sample->mperf, sample->tsc); 1309 1310 boost = cpu->iowait_boost; 1311 cpu->iowait_boost >>= 1; 1312 1313 if (busy_frac < boost) 1314 busy_frac = boost; 1315 1316 sample->busy_scaled = busy_frac * 100; 1317 1318 target = limits->no_turbo || limits->turbo_disabled ? 1319 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 1320 target += target >> 2; 1321 target = mul_fp(target, busy_frac); 1322 if (target < cpu->pstate.min_pstate) 1323 target = cpu->pstate.min_pstate; 1324 1325 /* 1326 * If the average P-state during the previous cycle was higher than the 1327 * current target, add 50% of the difference to the target to reduce 1328 * possible performance oscillations and offset possible performance 1329 * loss related to moving the workload from one CPU to another within 1330 * a package/module. 1331 */ 1332 avg_pstate = get_avg_pstate(cpu); 1333 if (avg_pstate > target) 1334 target += (avg_pstate - target) >> 1; 1335 1336 return target; 1337 } 1338 1339 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) 1340 { 1341 int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; 1342 u64 duration_ns; 1343 1344 /* 1345 * perf_scaled is the ratio of the average P-state during the last 1346 * sampling period to the P-state requested last time (in percent). 1347 * 1348 * That measures the system's response to the previous P-state 1349 * selection. 1350 */ 1351 max_pstate = cpu->pstate.max_pstate_physical; 1352 current_pstate = cpu->pstate.current_pstate; 1353 perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf, 1354 div_fp(100 * max_pstate, current_pstate)); 1355 1356 /* 1357 * Since our utilization update callback will not run unless we are 1358 * in C0, check if the actual elapsed time is significantly greater (3x) 1359 * than our sample interval. If it is, then we were idle for a long 1360 * enough period of time to adjust our performance metric. 1361 */ 1362 duration_ns = cpu->sample.time - cpu->last_sample_time; 1363 if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { 1364 sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); 1365 perf_scaled = mul_fp(perf_scaled, sample_ratio); 1366 } else { 1367 sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); 1368 if (sample_ratio < int_tofp(1)) 1369 perf_scaled = 0; 1370 } 1371 1372 cpu->sample.busy_scaled = perf_scaled; 1373 return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled); 1374 } 1375 1376 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) 1377 { 1378 int max_perf, min_perf; 1379 1380 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 1381 pstate = clamp_t(int, pstate, min_perf, max_perf); 1382 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 1383 return pstate; 1384 } 1385 1386 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) 1387 { 1388 pstate = intel_pstate_prepare_request(cpu, pstate); 1389 if (pstate == cpu->pstate.current_pstate) 1390 return; 1391 1392 cpu->pstate.current_pstate = pstate; 1393 wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); 1394 } 1395 1396 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 1397 { 1398 int from, target_pstate; 1399 struct sample *sample; 1400 1401 from = cpu->pstate.current_pstate; 1402 1403 target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? 1404 cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); 1405 1406 update_turbo_state(); 1407 1408 intel_pstate_update_pstate(cpu, target_pstate); 1409 1410 sample = &cpu->sample; 1411 trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), 1412 fp_toint(sample->busy_scaled), 1413 from, 1414 cpu->pstate.current_pstate, 1415 sample->mperf, 1416 sample->aperf, 1417 sample->tsc, 1418 get_avg_frequency(cpu), 1419 fp_toint(cpu->iowait_boost * 100)); 1420 } 1421 1422 static void intel_pstate_update_util(struct update_util_data *data, u64 time, 1423 unsigned int flags) 1424 { 1425 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1426 u64 delta_ns; 1427 1428 if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { 1429 if (flags & SCHED_CPUFREQ_IOWAIT) { 1430 cpu->iowait_boost = int_tofp(1); 1431 } else if (cpu->iowait_boost) { 1432 /* Clear iowait_boost if the CPU may have been idle. */ 1433 delta_ns = time - cpu->last_update; 1434 if (delta_ns > TICK_NSEC) 1435 cpu->iowait_boost = 0; 1436 } 1437 cpu->last_update = time; 1438 } 1439 1440 delta_ns = time - cpu->sample.time; 1441 if ((s64)delta_ns >= pid_params.sample_rate_ns) { 1442 bool sample_taken = intel_pstate_sample(cpu, time); 1443 1444 if (sample_taken) { 1445 intel_pstate_calc_avg_perf(cpu); 1446 if (!hwp_active) 1447 intel_pstate_adjust_busy_pstate(cpu); 1448 } 1449 } 1450 } 1451 1452 #define ICPU(model, policy) \ 1453 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 1454 (unsigned long)&policy } 1455 1456 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 1457 ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), 1458 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), 1459 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), 1460 ICPU(INTEL_FAM6_IVYBRIDGE, core_params), 1461 ICPU(INTEL_FAM6_HASWELL_CORE, core_params), 1462 ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), 1463 ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), 1464 ICPU(INTEL_FAM6_HASWELL_X, core_params), 1465 ICPU(INTEL_FAM6_HASWELL_ULT, core_params), 1466 ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), 1467 ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), 1468 ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), 1469 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), 1470 ICPU(INTEL_FAM6_BROADWELL_X, core_params), 1471 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), 1472 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), 1473 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), 1474 ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), 1475 {} 1476 }; 1477 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 1478 1479 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { 1480 ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), 1481 ICPU(INTEL_FAM6_BROADWELL_X, core_params), 1482 ICPU(INTEL_FAM6_SKYLAKE_X, core_params), 1483 {} 1484 }; 1485 1486 static int intel_pstate_init_cpu(unsigned int cpunum) 1487 { 1488 struct cpudata *cpu; 1489 1490 cpu = all_cpu_data[cpunum]; 1491 1492 if (!cpu) { 1493 unsigned int size = sizeof(struct cpudata); 1494 1495 if (per_cpu_limits) 1496 size += sizeof(struct perf_limits); 1497 1498 cpu = kzalloc(size, GFP_KERNEL); 1499 if (!cpu) 1500 return -ENOMEM; 1501 1502 all_cpu_data[cpunum] = cpu; 1503 if (per_cpu_limits) 1504 cpu->perf_limits = (struct perf_limits *)(cpu + 1); 1505 1506 } 1507 1508 cpu = all_cpu_data[cpunum]; 1509 1510 cpu->cpu = cpunum; 1511 1512 if (hwp_active) { 1513 intel_pstate_hwp_enable(cpu); 1514 pid_params.sample_rate_ms = 50; 1515 pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; 1516 } 1517 1518 intel_pstate_get_cpu_pstates(cpu); 1519 1520 intel_pstate_busy_pid_reset(cpu); 1521 1522 pr_debug("controlling: cpu %d\n", cpunum); 1523 1524 return 0; 1525 } 1526 1527 static unsigned int intel_pstate_get(unsigned int cpu_num) 1528 { 1529 struct cpudata *cpu = all_cpu_data[cpu_num]; 1530 1531 return cpu ? get_avg_frequency(cpu) : 0; 1532 } 1533 1534 static void intel_pstate_set_update_util_hook(unsigned int cpu_num) 1535 { 1536 struct cpudata *cpu = all_cpu_data[cpu_num]; 1537 1538 if (cpu->update_util_set) 1539 return; 1540 1541 /* Prevent intel_pstate_update_util() from using stale data. */ 1542 cpu->sample.time = 0; 1543 cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, 1544 intel_pstate_update_util); 1545 cpu->update_util_set = true; 1546 } 1547 1548 static void intel_pstate_clear_update_util_hook(unsigned int cpu) 1549 { 1550 struct cpudata *cpu_data = all_cpu_data[cpu]; 1551 1552 if (!cpu_data->update_util_set) 1553 return; 1554 1555 cpufreq_remove_update_util_hook(cpu); 1556 cpu_data->update_util_set = false; 1557 synchronize_sched(); 1558 } 1559 1560 static void intel_pstate_set_performance_limits(struct perf_limits *limits) 1561 { 1562 mutex_lock(&intel_pstate_limits_lock); 1563 limits->no_turbo = 0; 1564 limits->turbo_disabled = 0; 1565 limits->max_perf_pct = 100; 1566 limits->max_perf = int_ext_tofp(1); 1567 limits->min_perf_pct = 100; 1568 limits->min_perf = int_ext_tofp(1); 1569 limits->max_policy_pct = 100; 1570 limits->max_sysfs_pct = 100; 1571 limits->min_policy_pct = 0; 1572 limits->min_sysfs_pct = 0; 1573 mutex_unlock(&intel_pstate_limits_lock); 1574 } 1575 1576 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, 1577 struct perf_limits *limits) 1578 { 1579 1580 mutex_lock(&intel_pstate_limits_lock); 1581 1582 limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, 1583 policy->cpuinfo.max_freq); 1584 limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); 1585 if (policy->max == policy->min) { 1586 limits->min_policy_pct = limits->max_policy_pct; 1587 } else { 1588 limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100, 1589 policy->cpuinfo.max_freq); 1590 limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 1591 0, 100); 1592 } 1593 1594 /* Normalize user input to [min_policy_pct, max_policy_pct] */ 1595 limits->min_perf_pct = max(limits->min_policy_pct, 1596 limits->min_sysfs_pct); 1597 limits->min_perf_pct = min(limits->max_policy_pct, 1598 limits->min_perf_pct); 1599 limits->max_perf_pct = min(limits->max_policy_pct, 1600 limits->max_sysfs_pct); 1601 limits->max_perf_pct = max(limits->min_policy_pct, 1602 limits->max_perf_pct); 1603 1604 /* Make sure min_perf_pct <= max_perf_pct */ 1605 limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); 1606 1607 limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); 1608 limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); 1609 limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); 1610 limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); 1611 1612 mutex_unlock(&intel_pstate_limits_lock); 1613 1614 pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, 1615 limits->max_perf_pct, limits->min_perf_pct); 1616 } 1617 1618 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 1619 { 1620 struct cpudata *cpu; 1621 struct perf_limits *perf_limits = NULL; 1622 1623 if (!policy->cpuinfo.max_freq) 1624 return -ENODEV; 1625 1626 pr_debug("set_policy cpuinfo.max %u policy->max %u\n", 1627 policy->cpuinfo.max_freq, policy->max); 1628 1629 cpu = all_cpu_data[policy->cpu]; 1630 cpu->policy = policy->policy; 1631 1632 if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && 1633 policy->max < policy->cpuinfo.max_freq && 1634 policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { 1635 pr_debug("policy->max > max non turbo frequency\n"); 1636 policy->max = policy->cpuinfo.max_freq; 1637 } 1638 1639 if (per_cpu_limits) 1640 perf_limits = cpu->perf_limits; 1641 1642 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 1643 if (!perf_limits) { 1644 limits = &performance_limits; 1645 perf_limits = limits; 1646 } 1647 if (policy->max >= policy->cpuinfo.max_freq) { 1648 pr_debug("set performance\n"); 1649 intel_pstate_set_performance_limits(perf_limits); 1650 goto out; 1651 } 1652 } else { 1653 pr_debug("set powersave\n"); 1654 if (!perf_limits) { 1655 limits = &powersave_limits; 1656 perf_limits = limits; 1657 } 1658 1659 } 1660 1661 intel_pstate_update_perf_limits(policy, perf_limits); 1662 out: 1663 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 1664 /* 1665 * NOHZ_FULL CPUs need this as the governor callback may not 1666 * be invoked on them. 1667 */ 1668 intel_pstate_clear_update_util_hook(policy->cpu); 1669 intel_pstate_max_within_limits(cpu); 1670 } 1671 1672 intel_pstate_set_update_util_hook(policy->cpu); 1673 1674 intel_pstate_hwp_set_policy(policy); 1675 1676 return 0; 1677 } 1678 1679 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 1680 { 1681 cpufreq_verify_within_cpu_limits(policy); 1682 1683 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 1684 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 1685 return -EINVAL; 1686 1687 return 0; 1688 } 1689 1690 static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy) 1691 { 1692 intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); 1693 } 1694 1695 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 1696 { 1697 pr_debug("CPU %d exiting\n", policy->cpu); 1698 1699 intel_pstate_clear_update_util_hook(policy->cpu); 1700 if (!hwp_active) 1701 intel_cpufreq_stop_cpu(policy); 1702 } 1703 1704 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) 1705 { 1706 intel_pstate_exit_perf_limits(policy); 1707 1708 policy->fast_switch_possible = false; 1709 1710 return 0; 1711 } 1712 1713 static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) 1714 { 1715 struct cpudata *cpu; 1716 int rc; 1717 1718 rc = intel_pstate_init_cpu(policy->cpu); 1719 if (rc) 1720 return rc; 1721 1722 cpu = all_cpu_data[policy->cpu]; 1723 1724 /* 1725 * We need sane value in the cpu->perf_limits, so inherit from global 1726 * perf_limits limits, which are seeded with values based on the 1727 * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. 1728 */ 1729 if (per_cpu_limits) 1730 memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); 1731 1732 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 1733 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1734 1735 /* cpuinfo and default policy values */ 1736 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 1737 update_turbo_state(); 1738 policy->cpuinfo.max_freq = limits->turbo_disabled ? 1739 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 1740 policy->cpuinfo.max_freq *= cpu->pstate.scaling; 1741 1742 intel_pstate_init_acpi_perf_limits(policy); 1743 cpumask_set_cpu(policy->cpu, policy->cpus); 1744 1745 policy->fast_switch_possible = true; 1746 1747 return 0; 1748 } 1749 1750 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 1751 { 1752 int ret = __intel_pstate_cpu_init(policy); 1753 1754 if (ret) 1755 return ret; 1756 1757 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 1758 if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) 1759 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1760 else 1761 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1762 1763 return 0; 1764 } 1765 1766 static struct cpufreq_driver intel_pstate = { 1767 .flags = CPUFREQ_CONST_LOOPS, 1768 .verify = intel_pstate_verify_policy, 1769 .setpolicy = intel_pstate_set_policy, 1770 .resume = intel_pstate_hwp_set_policy, 1771 .get = intel_pstate_get, 1772 .init = intel_pstate_cpu_init, 1773 .exit = intel_pstate_cpu_exit, 1774 .stop_cpu = intel_pstate_stop_cpu, 1775 .name = "intel_pstate", 1776 }; 1777 1778 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) 1779 { 1780 struct cpudata *cpu = all_cpu_data[policy->cpu]; 1781 struct perf_limits *perf_limits = limits; 1782 1783 update_turbo_state(); 1784 policy->cpuinfo.max_freq = limits->turbo_disabled ? 1785 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 1786 1787 cpufreq_verify_within_cpu_limits(policy); 1788 1789 if (per_cpu_limits) 1790 perf_limits = cpu->perf_limits; 1791 1792 intel_pstate_update_perf_limits(policy, perf_limits); 1793 1794 return 0; 1795 } 1796 1797 static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, 1798 struct cpufreq_policy *policy, 1799 unsigned int target_freq) 1800 { 1801 unsigned int max_freq; 1802 1803 update_turbo_state(); 1804 1805 max_freq = limits->no_turbo || limits->turbo_disabled ? 1806 cpu->pstate.max_freq : cpu->pstate.turbo_freq; 1807 policy->cpuinfo.max_freq = max_freq; 1808 if (policy->max > max_freq) 1809 policy->max = max_freq; 1810 1811 if (target_freq > max_freq) 1812 target_freq = max_freq; 1813 1814 return target_freq; 1815 } 1816 1817 static int intel_cpufreq_target(struct cpufreq_policy *policy, 1818 unsigned int target_freq, 1819 unsigned int relation) 1820 { 1821 struct cpudata *cpu = all_cpu_data[policy->cpu]; 1822 struct cpufreq_freqs freqs; 1823 int target_pstate; 1824 1825 freqs.old = policy->cur; 1826 freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq); 1827 1828 cpufreq_freq_transition_begin(policy, &freqs); 1829 switch (relation) { 1830 case CPUFREQ_RELATION_L: 1831 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); 1832 break; 1833 case CPUFREQ_RELATION_H: 1834 target_pstate = freqs.new / cpu->pstate.scaling; 1835 break; 1836 default: 1837 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); 1838 break; 1839 } 1840 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 1841 if (target_pstate != cpu->pstate.current_pstate) { 1842 cpu->pstate.current_pstate = target_pstate; 1843 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, 1844 pstate_funcs.get_val(cpu, target_pstate)); 1845 } 1846 cpufreq_freq_transition_end(policy, &freqs, false); 1847 1848 return 0; 1849 } 1850 1851 static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, 1852 unsigned int target_freq) 1853 { 1854 struct cpudata *cpu = all_cpu_data[policy->cpu]; 1855 int target_pstate; 1856 1857 target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); 1858 target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); 1859 intel_pstate_update_pstate(cpu, target_pstate); 1860 return target_freq; 1861 } 1862 1863 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) 1864 { 1865 int ret = __intel_pstate_cpu_init(policy); 1866 1867 if (ret) 1868 return ret; 1869 1870 policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; 1871 /* This reflects the intel_pstate_get_cpu_pstates() setting. */ 1872 policy->cur = policy->cpuinfo.min_freq; 1873 1874 return 0; 1875 } 1876 1877 static struct cpufreq_driver intel_cpufreq = { 1878 .flags = CPUFREQ_CONST_LOOPS, 1879 .verify = intel_cpufreq_verify_policy, 1880 .target = intel_cpufreq_target, 1881 .fast_switch = intel_cpufreq_fast_switch, 1882 .init = intel_cpufreq_cpu_init, 1883 .exit = intel_pstate_cpu_exit, 1884 .stop_cpu = intel_cpufreq_stop_cpu, 1885 .name = "intel_cpufreq", 1886 }; 1887 1888 static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; 1889 1890 static int no_load __initdata; 1891 static int no_hwp __initdata; 1892 static int hwp_only __initdata; 1893 static unsigned int force_load __initdata; 1894 1895 static int __init intel_pstate_msrs_not_valid(void) 1896 { 1897 if (!pstate_funcs.get_max() || 1898 !pstate_funcs.get_min() || 1899 !pstate_funcs.get_turbo()) 1900 return -ENODEV; 1901 1902 return 0; 1903 } 1904 1905 static void __init copy_pid_params(struct pstate_adjust_policy *policy) 1906 { 1907 pid_params.sample_rate_ms = policy->sample_rate_ms; 1908 pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; 1909 pid_params.p_gain_pct = policy->p_gain_pct; 1910 pid_params.i_gain_pct = policy->i_gain_pct; 1911 pid_params.d_gain_pct = policy->d_gain_pct; 1912 pid_params.deadband = policy->deadband; 1913 pid_params.setpoint = policy->setpoint; 1914 } 1915 1916 #ifdef CONFIG_ACPI 1917 static void intel_pstate_use_acpi_profile(void) 1918 { 1919 if (acpi_gbl_FADT.preferred_profile == PM_MOBILE) 1920 pstate_funcs.get_target_pstate = 1921 get_target_pstate_use_cpu_load; 1922 } 1923 #else 1924 static void intel_pstate_use_acpi_profile(void) 1925 { 1926 } 1927 #endif 1928 1929 static void __init copy_cpu_funcs(struct pstate_funcs *funcs) 1930 { 1931 pstate_funcs.get_max = funcs->get_max; 1932 pstate_funcs.get_max_physical = funcs->get_max_physical; 1933 pstate_funcs.get_min = funcs->get_min; 1934 pstate_funcs.get_turbo = funcs->get_turbo; 1935 pstate_funcs.get_scaling = funcs->get_scaling; 1936 pstate_funcs.get_val = funcs->get_val; 1937 pstate_funcs.get_vid = funcs->get_vid; 1938 pstate_funcs.get_target_pstate = funcs->get_target_pstate; 1939 1940 intel_pstate_use_acpi_profile(); 1941 } 1942 1943 #ifdef CONFIG_ACPI 1944 1945 static bool __init intel_pstate_no_acpi_pss(void) 1946 { 1947 int i; 1948 1949 for_each_possible_cpu(i) { 1950 acpi_status status; 1951 union acpi_object *pss; 1952 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 1953 struct acpi_processor *pr = per_cpu(processors, i); 1954 1955 if (!pr) 1956 continue; 1957 1958 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 1959 if (ACPI_FAILURE(status)) 1960 continue; 1961 1962 pss = buffer.pointer; 1963 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 1964 kfree(pss); 1965 return false; 1966 } 1967 1968 kfree(pss); 1969 } 1970 1971 return true; 1972 } 1973 1974 static bool __init intel_pstate_has_acpi_ppc(void) 1975 { 1976 int i; 1977 1978 for_each_possible_cpu(i) { 1979 struct acpi_processor *pr = per_cpu(processors, i); 1980 1981 if (!pr) 1982 continue; 1983 if (acpi_has_method(pr->handle, "_PPC")) 1984 return true; 1985 } 1986 return false; 1987 } 1988 1989 enum { 1990 PSS, 1991 PPC, 1992 }; 1993 1994 struct hw_vendor_info { 1995 u16 valid; 1996 char oem_id[ACPI_OEM_ID_SIZE]; 1997 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 1998 int oem_pwr_table; 1999 }; 2000 2001 /* Hardware vendor-specific info that has its own power management modes */ 2002 static struct hw_vendor_info vendor_info[] __initdata = { 2003 {1, "HP ", "ProLiant", PSS}, 2004 {1, "ORACLE", "X4-2 ", PPC}, 2005 {1, "ORACLE", "X4-2L ", PPC}, 2006 {1, "ORACLE", "X4-2B ", PPC}, 2007 {1, "ORACLE", "X3-2 ", PPC}, 2008 {1, "ORACLE", "X3-2L ", PPC}, 2009 {1, "ORACLE", "X3-2B ", PPC}, 2010 {1, "ORACLE", "X4470M2 ", PPC}, 2011 {1, "ORACLE", "X4270M3 ", PPC}, 2012 {1, "ORACLE", "X4270M2 ", PPC}, 2013 {1, "ORACLE", "X4170M2 ", PPC}, 2014 {1, "ORACLE", "X4170 M3", PPC}, 2015 {1, "ORACLE", "X4275 M3", PPC}, 2016 {1, "ORACLE", "X6-2 ", PPC}, 2017 {1, "ORACLE", "Sudbury ", PPC}, 2018 {0, "", ""}, 2019 }; 2020 2021 static bool __init intel_pstate_platform_pwr_mgmt_exists(void) 2022 { 2023 struct acpi_table_header hdr; 2024 struct hw_vendor_info *v_info; 2025 const struct x86_cpu_id *id; 2026 u64 misc_pwr; 2027 2028 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 2029 if (id) { 2030 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 2031 if ( misc_pwr & (1 << 8)) 2032 return true; 2033 } 2034 2035 if (acpi_disabled || 2036 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 2037 return false; 2038 2039 for (v_info = vendor_info; v_info->valid; v_info++) { 2040 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && 2041 !strncmp(hdr.oem_table_id, v_info->oem_table_id, 2042 ACPI_OEM_TABLE_ID_SIZE)) 2043 switch (v_info->oem_pwr_table) { 2044 case PSS: 2045 return intel_pstate_no_acpi_pss(); 2046 case PPC: 2047 return intel_pstate_has_acpi_ppc() && 2048 (!force_load); 2049 } 2050 } 2051 2052 return false; 2053 } 2054 2055 static void intel_pstate_request_control_from_smm(void) 2056 { 2057 /* 2058 * It may be unsafe to request P-states control from SMM if _PPC support 2059 * has not been enabled. 2060 */ 2061 if (acpi_ppc) 2062 acpi_processor_pstate_control(); 2063 } 2064 #else /* CONFIG_ACPI not enabled */ 2065 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 2066 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 2067 static inline void intel_pstate_request_control_from_smm(void) {} 2068 #endif /* CONFIG_ACPI */ 2069 2070 static const struct x86_cpu_id hwp_support_ids[] __initconst = { 2071 { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, 2072 {} 2073 }; 2074 2075 static int __init intel_pstate_init(void) 2076 { 2077 int cpu, rc = 0; 2078 const struct x86_cpu_id *id; 2079 struct cpu_defaults *cpu_def; 2080 2081 if (no_load) 2082 return -ENODEV; 2083 2084 if (x86_match_cpu(hwp_support_ids) && !no_hwp) { 2085 copy_cpu_funcs(&core_params.funcs); 2086 hwp_active++; 2087 goto hwp_cpu_matched; 2088 } 2089 2090 id = x86_match_cpu(intel_pstate_cpu_ids); 2091 if (!id) 2092 return -ENODEV; 2093 2094 cpu_def = (struct cpu_defaults *)id->driver_data; 2095 2096 copy_pid_params(&cpu_def->pid_policy); 2097 copy_cpu_funcs(&cpu_def->funcs); 2098 2099 if (intel_pstate_msrs_not_valid()) 2100 return -ENODEV; 2101 2102 hwp_cpu_matched: 2103 /* 2104 * The Intel pstate driver will be ignored if the platform 2105 * firmware has its own power management modes. 2106 */ 2107 if (intel_pstate_platform_pwr_mgmt_exists()) 2108 return -ENODEV; 2109 2110 pr_info("Intel P-state driver initializing\n"); 2111 2112 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 2113 if (!all_cpu_data) 2114 return -ENOMEM; 2115 2116 if (!hwp_active && hwp_only) 2117 goto out; 2118 2119 intel_pstate_request_control_from_smm(); 2120 2121 rc = cpufreq_register_driver(intel_pstate_driver); 2122 if (rc) 2123 goto out; 2124 2125 intel_pstate_debug_expose_params(); 2126 intel_pstate_sysfs_expose_params(); 2127 2128 if (hwp_active) 2129 pr_info("HWP enabled\n"); 2130 2131 return rc; 2132 out: 2133 get_online_cpus(); 2134 for_each_online_cpu(cpu) { 2135 if (all_cpu_data[cpu]) { 2136 if (intel_pstate_driver == &intel_pstate) 2137 intel_pstate_clear_update_util_hook(cpu); 2138 2139 kfree(all_cpu_data[cpu]); 2140 } 2141 } 2142 2143 put_online_cpus(); 2144 vfree(all_cpu_data); 2145 return -ENODEV; 2146 } 2147 device_initcall(intel_pstate_init); 2148 2149 static int __init intel_pstate_setup(char *str) 2150 { 2151 if (!str) 2152 return -EINVAL; 2153 2154 if (!strcmp(str, "disable")) { 2155 no_load = 1; 2156 } else if (!strcmp(str, "passive")) { 2157 pr_info("Passive mode enabled\n"); 2158 intel_pstate_driver = &intel_cpufreq; 2159 no_hwp = 1; 2160 } 2161 if (!strcmp(str, "no_hwp")) { 2162 pr_info("HWP disabled\n"); 2163 no_hwp = 1; 2164 } 2165 if (!strcmp(str, "force")) 2166 force_load = 1; 2167 if (!strcmp(str, "hwp_only")) 2168 hwp_only = 1; 2169 if (!strcmp(str, "per_cpu_perf_limits")) 2170 per_cpu_limits = true; 2171 2172 #ifdef CONFIG_ACPI 2173 if (!strcmp(str, "support_acpi_ppc")) 2174 acpi_ppc = true; 2175 #endif 2176 2177 return 0; 2178 } 2179 early_param("intel_pstate", intel_pstate_setup); 2180 2181 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 2182 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 2183 MODULE_LICENSE("GPL"); 2184