1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/module.h> 16 #include <linux/ktime.h> 17 #include <linux/hrtimer.h> 18 #include <linux/tick.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/list.h> 22 #include <linux/cpu.h> 23 #include <linux/cpufreq.h> 24 #include <linux/sysfs.h> 25 #include <linux/types.h> 26 #include <linux/fs.h> 27 #include <linux/debugfs.h> 28 #include <linux/acpi.h> 29 #include <linux/vmalloc.h> 30 #include <trace/events/power.h> 31 32 #include <asm/div64.h> 33 #include <asm/msr.h> 34 #include <asm/cpu_device_id.h> 35 #include <asm/cpufeature.h> 36 37 #define BYT_RATIOS 0x66a 38 #define BYT_VIDS 0x66b 39 #define BYT_TURBO_RATIOS 0x66c 40 #define BYT_TURBO_VIDS 0x66d 41 42 #define FRAC_BITS 8 43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 44 #define fp_toint(X) ((X) >> FRAC_BITS) 45 46 47 static inline int32_t mul_fp(int32_t x, int32_t y) 48 { 49 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 50 } 51 52 static inline int32_t div_fp(s64 x, s64 y) 53 { 54 return div64_s64((int64_t)x << FRAC_BITS, y); 55 } 56 57 static inline int ceiling_fp(int32_t x) 58 { 59 int mask, ret; 60 61 ret = fp_toint(x); 62 mask = (1 << FRAC_BITS) - 1; 63 if (x & mask) 64 ret += 1; 65 return ret; 66 } 67 68 struct sample { 69 int32_t core_pct_busy; 70 u64 aperf; 71 u64 mperf; 72 u64 tsc; 73 int freq; 74 ktime_t time; 75 }; 76 77 struct pstate_data { 78 int current_pstate; 79 int min_pstate; 80 int max_pstate; 81 int scaling; 82 int turbo_pstate; 83 }; 84 85 struct vid_data { 86 int min; 87 int max; 88 int turbo; 89 int32_t ratio; 90 }; 91 92 struct _pid { 93 int setpoint; 94 int32_t integral; 95 int32_t p_gain; 96 int32_t i_gain; 97 int32_t d_gain; 98 int deadband; 99 int32_t last_err; 100 }; 101 102 struct cpudata { 103 int cpu; 104 105 struct timer_list timer; 106 107 struct pstate_data pstate; 108 struct vid_data vid; 109 struct _pid pid; 110 111 ktime_t last_sample_time; 112 u64 prev_aperf; 113 u64 prev_mperf; 114 u64 prev_tsc; 115 struct sample sample; 116 }; 117 118 static struct cpudata **all_cpu_data; 119 struct pstate_adjust_policy { 120 int sample_rate_ms; 121 int deadband; 122 int setpoint; 123 int p_gain_pct; 124 int d_gain_pct; 125 int i_gain_pct; 126 }; 127 128 struct pstate_funcs { 129 int (*get_max)(void); 130 int (*get_min)(void); 131 int (*get_turbo)(void); 132 int (*get_scaling)(void); 133 void (*set)(struct cpudata*, int pstate); 134 void (*get_vid)(struct cpudata *); 135 }; 136 137 struct cpu_defaults { 138 struct pstate_adjust_policy pid_policy; 139 struct pstate_funcs funcs; 140 }; 141 142 static struct pstate_adjust_policy pid_params; 143 static struct pstate_funcs pstate_funcs; 144 static int hwp_active; 145 146 struct perf_limits { 147 int no_turbo; 148 int turbo_disabled; 149 int max_perf_pct; 150 int min_perf_pct; 151 int32_t max_perf; 152 int32_t min_perf; 153 int max_policy_pct; 154 int max_sysfs_pct; 155 int min_policy_pct; 156 int min_sysfs_pct; 157 }; 158 159 static struct perf_limits limits = { 160 .no_turbo = 0, 161 .turbo_disabled = 0, 162 .max_perf_pct = 100, 163 .max_perf = int_tofp(1), 164 .min_perf_pct = 0, 165 .min_perf = 0, 166 .max_policy_pct = 100, 167 .max_sysfs_pct = 100, 168 .min_policy_pct = 0, 169 .min_sysfs_pct = 0, 170 }; 171 172 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 173 int deadband, int integral) { 174 pid->setpoint = setpoint; 175 pid->deadband = deadband; 176 pid->integral = int_tofp(integral); 177 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 178 } 179 180 static inline void pid_p_gain_set(struct _pid *pid, int percent) 181 { 182 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); 183 } 184 185 static inline void pid_i_gain_set(struct _pid *pid, int percent) 186 { 187 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); 188 } 189 190 static inline void pid_d_gain_set(struct _pid *pid, int percent) 191 { 192 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); 193 } 194 195 static signed int pid_calc(struct _pid *pid, int32_t busy) 196 { 197 signed int result; 198 int32_t pterm, dterm, fp_error; 199 int32_t integral_limit; 200 201 fp_error = int_tofp(pid->setpoint) - busy; 202 203 if (abs(fp_error) <= int_tofp(pid->deadband)) 204 return 0; 205 206 pterm = mul_fp(pid->p_gain, fp_error); 207 208 pid->integral += fp_error; 209 210 /* 211 * We limit the integral here so that it will never 212 * get higher than 30. This prevents it from becoming 213 * too large an input over long periods of time and allows 214 * it to get factored out sooner. 215 * 216 * The value of 30 was chosen through experimentation. 217 */ 218 integral_limit = int_tofp(30); 219 if (pid->integral > integral_limit) 220 pid->integral = integral_limit; 221 if (pid->integral < -integral_limit) 222 pid->integral = -integral_limit; 223 224 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 225 pid->last_err = fp_error; 226 227 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 228 result = result + (1 << (FRAC_BITS-1)); 229 return (signed int)fp_toint(result); 230 } 231 232 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 233 { 234 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 235 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 236 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 237 238 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); 239 } 240 241 static inline void intel_pstate_reset_all_pid(void) 242 { 243 unsigned int cpu; 244 245 for_each_online_cpu(cpu) { 246 if (all_cpu_data[cpu]) 247 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 248 } 249 } 250 251 static inline void update_turbo_state(void) 252 { 253 u64 misc_en; 254 struct cpudata *cpu; 255 256 cpu = all_cpu_data[0]; 257 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 258 limits.turbo_disabled = 259 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 260 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 261 } 262 263 static void intel_pstate_hwp_set(void) 264 { 265 int min, hw_min, max, hw_max, cpu, range, adj_range; 266 u64 value, cap; 267 268 rdmsrl(MSR_HWP_CAPABILITIES, cap); 269 hw_min = HWP_LOWEST_PERF(cap); 270 hw_max = HWP_HIGHEST_PERF(cap); 271 range = hw_max - hw_min; 272 273 get_online_cpus(); 274 275 for_each_online_cpu(cpu) { 276 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 277 adj_range = limits.min_perf_pct * range / 100; 278 min = hw_min + adj_range; 279 value &= ~HWP_MIN_PERF(~0L); 280 value |= HWP_MIN_PERF(min); 281 282 adj_range = limits.max_perf_pct * range / 100; 283 max = hw_min + adj_range; 284 if (limits.no_turbo) { 285 hw_max = HWP_GUARANTEED_PERF(cap); 286 if (hw_max < max) 287 max = hw_max; 288 } 289 290 value &= ~HWP_MAX_PERF(~0L); 291 value |= HWP_MAX_PERF(max); 292 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 293 } 294 295 put_online_cpus(); 296 } 297 298 /************************** debugfs begin ************************/ 299 static int pid_param_set(void *data, u64 val) 300 { 301 *(u32 *)data = val; 302 intel_pstate_reset_all_pid(); 303 return 0; 304 } 305 306 static int pid_param_get(void *data, u64 *val) 307 { 308 *val = *(u32 *)data; 309 return 0; 310 } 311 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); 312 313 struct pid_param { 314 char *name; 315 void *value; 316 }; 317 318 static struct pid_param pid_files[] = { 319 {"sample_rate_ms", &pid_params.sample_rate_ms}, 320 {"d_gain_pct", &pid_params.d_gain_pct}, 321 {"i_gain_pct", &pid_params.i_gain_pct}, 322 {"deadband", &pid_params.deadband}, 323 {"setpoint", &pid_params.setpoint}, 324 {"p_gain_pct", &pid_params.p_gain_pct}, 325 {NULL, NULL} 326 }; 327 328 static void __init intel_pstate_debug_expose_params(void) 329 { 330 struct dentry *debugfs_parent; 331 int i = 0; 332 333 if (hwp_active) 334 return; 335 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 336 if (IS_ERR_OR_NULL(debugfs_parent)) 337 return; 338 while (pid_files[i].name) { 339 debugfs_create_file(pid_files[i].name, 0660, 340 debugfs_parent, pid_files[i].value, 341 &fops_pid_param); 342 i++; 343 } 344 } 345 346 /************************** debugfs end ************************/ 347 348 /************************** sysfs begin ************************/ 349 #define show_one(file_name, object) \ 350 static ssize_t show_##file_name \ 351 (struct kobject *kobj, struct attribute *attr, char *buf) \ 352 { \ 353 return sprintf(buf, "%u\n", limits.object); \ 354 } 355 356 static ssize_t show_turbo_pct(struct kobject *kobj, 357 struct attribute *attr, char *buf) 358 { 359 struct cpudata *cpu; 360 int total, no_turbo, turbo_pct; 361 uint32_t turbo_fp; 362 363 cpu = all_cpu_data[0]; 364 365 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 366 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; 367 turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); 368 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); 369 return sprintf(buf, "%u\n", turbo_pct); 370 } 371 372 static ssize_t show_num_pstates(struct kobject *kobj, 373 struct attribute *attr, char *buf) 374 { 375 struct cpudata *cpu; 376 int total; 377 378 cpu = all_cpu_data[0]; 379 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 380 return sprintf(buf, "%u\n", total); 381 } 382 383 static ssize_t show_no_turbo(struct kobject *kobj, 384 struct attribute *attr, char *buf) 385 { 386 ssize_t ret; 387 388 update_turbo_state(); 389 if (limits.turbo_disabled) 390 ret = sprintf(buf, "%u\n", limits.turbo_disabled); 391 else 392 ret = sprintf(buf, "%u\n", limits.no_turbo); 393 394 return ret; 395 } 396 397 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 398 const char *buf, size_t count) 399 { 400 unsigned int input; 401 int ret; 402 403 ret = sscanf(buf, "%u", &input); 404 if (ret != 1) 405 return -EINVAL; 406 407 update_turbo_state(); 408 if (limits.turbo_disabled) { 409 pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); 410 return -EPERM; 411 } 412 413 limits.no_turbo = clamp_t(int, input, 0, 1); 414 415 if (hwp_active) 416 intel_pstate_hwp_set(); 417 418 return count; 419 } 420 421 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 422 const char *buf, size_t count) 423 { 424 unsigned int input; 425 int ret; 426 427 ret = sscanf(buf, "%u", &input); 428 if (ret != 1) 429 return -EINVAL; 430 431 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); 432 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 433 limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); 434 limits.max_perf_pct = max(limits.min_perf_pct, limits.max_perf_pct); 435 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 436 437 if (hwp_active) 438 intel_pstate_hwp_set(); 439 return count; 440 } 441 442 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 443 const char *buf, size_t count) 444 { 445 unsigned int input; 446 int ret; 447 448 ret = sscanf(buf, "%u", &input); 449 if (ret != 1) 450 return -EINVAL; 451 452 limits.min_sysfs_pct = clamp_t(int, input, 0 , 100); 453 limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); 454 limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); 455 limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); 456 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 457 458 if (hwp_active) 459 intel_pstate_hwp_set(); 460 return count; 461 } 462 463 show_one(max_perf_pct, max_perf_pct); 464 show_one(min_perf_pct, min_perf_pct); 465 466 define_one_global_rw(no_turbo); 467 define_one_global_rw(max_perf_pct); 468 define_one_global_rw(min_perf_pct); 469 define_one_global_ro(turbo_pct); 470 define_one_global_ro(num_pstates); 471 472 static struct attribute *intel_pstate_attributes[] = { 473 &no_turbo.attr, 474 &max_perf_pct.attr, 475 &min_perf_pct.attr, 476 &turbo_pct.attr, 477 &num_pstates.attr, 478 NULL 479 }; 480 481 static struct attribute_group intel_pstate_attr_group = { 482 .attrs = intel_pstate_attributes, 483 }; 484 485 static void __init intel_pstate_sysfs_expose_params(void) 486 { 487 struct kobject *intel_pstate_kobject; 488 int rc; 489 490 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 491 &cpu_subsys.dev_root->kobj); 492 BUG_ON(!intel_pstate_kobject); 493 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 494 BUG_ON(rc); 495 } 496 /************************** sysfs end ************************/ 497 498 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 499 { 500 pr_info("intel_pstate: HWP enabled\n"); 501 502 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 503 } 504 505 static int byt_get_min_pstate(void) 506 { 507 u64 value; 508 509 rdmsrl(BYT_RATIOS, value); 510 return (value >> 8) & 0x7F; 511 } 512 513 static int byt_get_max_pstate(void) 514 { 515 u64 value; 516 517 rdmsrl(BYT_RATIOS, value); 518 return (value >> 16) & 0x7F; 519 } 520 521 static int byt_get_turbo_pstate(void) 522 { 523 u64 value; 524 525 rdmsrl(BYT_TURBO_RATIOS, value); 526 return value & 0x7F; 527 } 528 529 static void byt_set_pstate(struct cpudata *cpudata, int pstate) 530 { 531 u64 val; 532 int32_t vid_fp; 533 u32 vid; 534 535 val = (u64)pstate << 8; 536 if (limits.no_turbo && !limits.turbo_disabled) 537 val |= (u64)1 << 32; 538 539 vid_fp = cpudata->vid.min + mul_fp( 540 int_tofp(pstate - cpudata->pstate.min_pstate), 541 cpudata->vid.ratio); 542 543 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 544 vid = ceiling_fp(vid_fp); 545 546 if (pstate > cpudata->pstate.max_pstate) 547 vid = cpudata->vid.turbo; 548 549 val |= vid; 550 551 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 552 } 553 554 #define BYT_BCLK_FREQS 5 555 static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; 556 557 static int byt_get_scaling(void) 558 { 559 u64 value; 560 int i; 561 562 rdmsrl(MSR_FSB_FREQ, value); 563 i = value & 0x3; 564 565 BUG_ON(i > BYT_BCLK_FREQS); 566 567 return byt_freq_table[i] * 100; 568 } 569 570 static void byt_get_vid(struct cpudata *cpudata) 571 { 572 u64 value; 573 574 rdmsrl(BYT_VIDS, value); 575 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 576 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 577 cpudata->vid.ratio = div_fp( 578 cpudata->vid.max - cpudata->vid.min, 579 int_tofp(cpudata->pstate.max_pstate - 580 cpudata->pstate.min_pstate)); 581 582 rdmsrl(BYT_TURBO_VIDS, value); 583 cpudata->vid.turbo = value & 0x7f; 584 } 585 586 static int core_get_min_pstate(void) 587 { 588 u64 value; 589 590 rdmsrl(MSR_PLATFORM_INFO, value); 591 return (value >> 40) & 0xFF; 592 } 593 594 static int core_get_max_pstate(void) 595 { 596 u64 value; 597 598 rdmsrl(MSR_PLATFORM_INFO, value); 599 return (value >> 8) & 0xFF; 600 } 601 602 static int core_get_turbo_pstate(void) 603 { 604 u64 value; 605 int nont, ret; 606 607 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 608 nont = core_get_max_pstate(); 609 ret = (value) & 255; 610 if (ret <= nont) 611 ret = nont; 612 return ret; 613 } 614 615 static inline int core_get_scaling(void) 616 { 617 return 100000; 618 } 619 620 static void core_set_pstate(struct cpudata *cpudata, int pstate) 621 { 622 u64 val; 623 624 val = (u64)pstate << 8; 625 if (limits.no_turbo && !limits.turbo_disabled) 626 val |= (u64)1 << 32; 627 628 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 629 } 630 631 static int knl_get_turbo_pstate(void) 632 { 633 u64 value; 634 int nont, ret; 635 636 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 637 nont = core_get_max_pstate(); 638 ret = (((value) >> 8) & 0xFF); 639 if (ret <= nont) 640 ret = nont; 641 return ret; 642 } 643 644 static struct cpu_defaults core_params = { 645 .pid_policy = { 646 .sample_rate_ms = 10, 647 .deadband = 0, 648 .setpoint = 97, 649 .p_gain_pct = 20, 650 .d_gain_pct = 0, 651 .i_gain_pct = 0, 652 }, 653 .funcs = { 654 .get_max = core_get_max_pstate, 655 .get_min = core_get_min_pstate, 656 .get_turbo = core_get_turbo_pstate, 657 .get_scaling = core_get_scaling, 658 .set = core_set_pstate, 659 }, 660 }; 661 662 static struct cpu_defaults byt_params = { 663 .pid_policy = { 664 .sample_rate_ms = 10, 665 .deadband = 0, 666 .setpoint = 60, 667 .p_gain_pct = 14, 668 .d_gain_pct = 0, 669 .i_gain_pct = 4, 670 }, 671 .funcs = { 672 .get_max = byt_get_max_pstate, 673 .get_min = byt_get_min_pstate, 674 .get_turbo = byt_get_turbo_pstate, 675 .set = byt_set_pstate, 676 .get_scaling = byt_get_scaling, 677 .get_vid = byt_get_vid, 678 }, 679 }; 680 681 static struct cpu_defaults knl_params = { 682 .pid_policy = { 683 .sample_rate_ms = 10, 684 .deadband = 0, 685 .setpoint = 97, 686 .p_gain_pct = 20, 687 .d_gain_pct = 0, 688 .i_gain_pct = 0, 689 }, 690 .funcs = { 691 .get_max = core_get_max_pstate, 692 .get_min = core_get_min_pstate, 693 .get_turbo = knl_get_turbo_pstate, 694 .get_scaling = core_get_scaling, 695 .set = core_set_pstate, 696 }, 697 }; 698 699 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 700 { 701 int max_perf = cpu->pstate.turbo_pstate; 702 int max_perf_adj; 703 int min_perf; 704 705 if (limits.no_turbo || limits.turbo_disabled) 706 max_perf = cpu->pstate.max_pstate; 707 708 /* 709 * performance can be limited by user through sysfs, by cpufreq 710 * policy, or by cpu specific default values determined through 711 * experimentation. 712 */ 713 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 714 *max = clamp_t(int, max_perf_adj, 715 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 716 717 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); 718 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 719 } 720 721 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) 722 { 723 int max_perf, min_perf; 724 725 if (force) { 726 update_turbo_state(); 727 728 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 729 730 pstate = clamp_t(int, pstate, min_perf, max_perf); 731 732 if (pstate == cpu->pstate.current_pstate) 733 return; 734 } 735 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 736 737 cpu->pstate.current_pstate = pstate; 738 739 pstate_funcs.set(cpu, pstate); 740 } 741 742 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 743 { 744 cpu->pstate.min_pstate = pstate_funcs.get_min(); 745 cpu->pstate.max_pstate = pstate_funcs.get_max(); 746 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 747 cpu->pstate.scaling = pstate_funcs.get_scaling(); 748 749 if (pstate_funcs.get_vid) 750 pstate_funcs.get_vid(cpu); 751 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); 752 } 753 754 static inline void intel_pstate_calc_busy(struct cpudata *cpu) 755 { 756 struct sample *sample = &cpu->sample; 757 int64_t core_pct; 758 759 core_pct = int_tofp(sample->aperf) * int_tofp(100); 760 core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); 761 762 sample->freq = fp_toint( 763 mul_fp(int_tofp( 764 cpu->pstate.max_pstate * cpu->pstate.scaling / 100), 765 core_pct)); 766 767 sample->core_pct_busy = (int32_t)core_pct; 768 } 769 770 static inline void intel_pstate_sample(struct cpudata *cpu) 771 { 772 u64 aperf, mperf; 773 unsigned long flags; 774 u64 tsc; 775 776 local_irq_save(flags); 777 rdmsrl(MSR_IA32_APERF, aperf); 778 rdmsrl(MSR_IA32_MPERF, mperf); 779 tsc = rdtsc(); 780 local_irq_restore(flags); 781 782 cpu->last_sample_time = cpu->sample.time; 783 cpu->sample.time = ktime_get(); 784 cpu->sample.aperf = aperf; 785 cpu->sample.mperf = mperf; 786 cpu->sample.tsc = tsc; 787 cpu->sample.aperf -= cpu->prev_aperf; 788 cpu->sample.mperf -= cpu->prev_mperf; 789 cpu->sample.tsc -= cpu->prev_tsc; 790 791 intel_pstate_calc_busy(cpu); 792 793 cpu->prev_aperf = aperf; 794 cpu->prev_mperf = mperf; 795 cpu->prev_tsc = tsc; 796 } 797 798 static inline void intel_hwp_set_sample_time(struct cpudata *cpu) 799 { 800 int delay; 801 802 delay = msecs_to_jiffies(50); 803 mod_timer_pinned(&cpu->timer, jiffies + delay); 804 } 805 806 static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 807 { 808 int delay; 809 810 delay = msecs_to_jiffies(pid_params.sample_rate_ms); 811 mod_timer_pinned(&cpu->timer, jiffies + delay); 812 } 813 814 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 815 { 816 int32_t core_busy, max_pstate, current_pstate, sample_ratio; 817 s64 duration_us; 818 u32 sample_time; 819 820 /* 821 * core_busy is the ratio of actual performance to max 822 * max_pstate is the max non turbo pstate available 823 * current_pstate was the pstate that was requested during 824 * the last sample period. 825 * 826 * We normalize core_busy, which was our actual percent 827 * performance to what we requested during the last sample 828 * period. The result will be a percentage of busy at a 829 * specified pstate. 830 */ 831 core_busy = cpu->sample.core_pct_busy; 832 max_pstate = int_tofp(cpu->pstate.max_pstate); 833 current_pstate = int_tofp(cpu->pstate.current_pstate); 834 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 835 836 /* 837 * Since we have a deferred timer, it will not fire unless 838 * we are in C0. So, determine if the actual elapsed time 839 * is significantly greater (3x) than our sample interval. If it 840 * is, then we were idle for a long enough period of time 841 * to adjust our busyness. 842 */ 843 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; 844 duration_us = ktime_us_delta(cpu->sample.time, 845 cpu->last_sample_time); 846 if (duration_us > sample_time * 3) { 847 sample_ratio = div_fp(int_tofp(sample_time), 848 int_tofp(duration_us)); 849 core_busy = mul_fp(core_busy, sample_ratio); 850 } 851 852 return core_busy; 853 } 854 855 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 856 { 857 int32_t busy_scaled; 858 struct _pid *pid; 859 signed int ctl; 860 int from; 861 struct sample *sample; 862 863 from = cpu->pstate.current_pstate; 864 865 pid = &cpu->pid; 866 busy_scaled = intel_pstate_get_scaled_busy(cpu); 867 868 ctl = pid_calc(pid, busy_scaled); 869 870 /* Negative values of ctl increase the pstate and vice versa */ 871 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); 872 873 sample = &cpu->sample; 874 trace_pstate_sample(fp_toint(sample->core_pct_busy), 875 fp_toint(busy_scaled), 876 from, 877 cpu->pstate.current_pstate, 878 sample->mperf, 879 sample->aperf, 880 sample->tsc, 881 sample->freq); 882 } 883 884 static void intel_hwp_timer_func(unsigned long __data) 885 { 886 struct cpudata *cpu = (struct cpudata *) __data; 887 888 intel_pstate_sample(cpu); 889 intel_hwp_set_sample_time(cpu); 890 } 891 892 static void intel_pstate_timer_func(unsigned long __data) 893 { 894 struct cpudata *cpu = (struct cpudata *) __data; 895 896 intel_pstate_sample(cpu); 897 898 intel_pstate_adjust_busy_pstate(cpu); 899 900 intel_pstate_set_sample_time(cpu); 901 } 902 903 #define ICPU(model, policy) \ 904 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 905 (unsigned long)&policy } 906 907 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 908 ICPU(0x2a, core_params), 909 ICPU(0x2d, core_params), 910 ICPU(0x37, byt_params), 911 ICPU(0x3a, core_params), 912 ICPU(0x3c, core_params), 913 ICPU(0x3d, core_params), 914 ICPU(0x3e, core_params), 915 ICPU(0x3f, core_params), 916 ICPU(0x45, core_params), 917 ICPU(0x46, core_params), 918 ICPU(0x47, core_params), 919 ICPU(0x4c, byt_params), 920 ICPU(0x4e, core_params), 921 ICPU(0x4f, core_params), 922 ICPU(0x5e, core_params), 923 ICPU(0x56, core_params), 924 ICPU(0x57, knl_params), 925 {} 926 }; 927 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 928 929 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { 930 ICPU(0x56, core_params), 931 {} 932 }; 933 934 static int intel_pstate_init_cpu(unsigned int cpunum) 935 { 936 struct cpudata *cpu; 937 938 if (!all_cpu_data[cpunum]) 939 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), 940 GFP_KERNEL); 941 if (!all_cpu_data[cpunum]) 942 return -ENOMEM; 943 944 cpu = all_cpu_data[cpunum]; 945 946 cpu->cpu = cpunum; 947 948 if (hwp_active) 949 intel_pstate_hwp_enable(cpu); 950 951 intel_pstate_get_cpu_pstates(cpu); 952 953 init_timer_deferrable(&cpu->timer); 954 cpu->timer.data = (unsigned long)cpu; 955 cpu->timer.expires = jiffies + HZ/100; 956 957 if (!hwp_active) 958 cpu->timer.function = intel_pstate_timer_func; 959 else 960 cpu->timer.function = intel_hwp_timer_func; 961 962 intel_pstate_busy_pid_reset(cpu); 963 intel_pstate_sample(cpu); 964 965 add_timer_on(&cpu->timer, cpunum); 966 967 pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); 968 969 return 0; 970 } 971 972 static unsigned int intel_pstate_get(unsigned int cpu_num) 973 { 974 struct sample *sample; 975 struct cpudata *cpu; 976 977 cpu = all_cpu_data[cpu_num]; 978 if (!cpu) 979 return 0; 980 sample = &cpu->sample; 981 return sample->freq; 982 } 983 984 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 985 { 986 if (!policy->cpuinfo.max_freq) 987 return -ENODEV; 988 989 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && 990 policy->max >= policy->cpuinfo.max_freq) { 991 limits.min_policy_pct = 100; 992 limits.min_perf_pct = 100; 993 limits.min_perf = int_tofp(1); 994 limits.max_policy_pct = 100; 995 limits.max_perf_pct = 100; 996 limits.max_perf = int_tofp(1); 997 limits.no_turbo = 0; 998 return 0; 999 } 1000 1001 limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 1002 limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100); 1003 limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; 1004 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); 1005 1006 /* Normalize user input to [min_policy_pct, max_policy_pct] */ 1007 limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); 1008 limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); 1009 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 1010 limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); 1011 1012 /* Make sure min_perf_pct <= max_perf_pct */ 1013 limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); 1014 1015 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 1016 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 1017 1018 if (hwp_active) 1019 intel_pstate_hwp_set(); 1020 1021 return 0; 1022 } 1023 1024 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 1025 { 1026 cpufreq_verify_within_cpu_limits(policy); 1027 1028 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 1029 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 1030 return -EINVAL; 1031 1032 return 0; 1033 } 1034 1035 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 1036 { 1037 int cpu_num = policy->cpu; 1038 struct cpudata *cpu = all_cpu_data[cpu_num]; 1039 1040 pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); 1041 1042 del_timer_sync(&all_cpu_data[cpu_num]->timer); 1043 if (hwp_active) 1044 return; 1045 1046 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); 1047 } 1048 1049 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 1050 { 1051 struct cpudata *cpu; 1052 int rc; 1053 1054 rc = intel_pstate_init_cpu(policy->cpu); 1055 if (rc) 1056 return rc; 1057 1058 cpu = all_cpu_data[policy->cpu]; 1059 1060 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 1061 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1062 else 1063 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1064 1065 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 1066 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1067 1068 /* cpuinfo and default policy values */ 1069 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 1070 policy->cpuinfo.max_freq = 1071 cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1072 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 1073 cpumask_set_cpu(policy->cpu, policy->cpus); 1074 1075 return 0; 1076 } 1077 1078 static struct cpufreq_driver intel_pstate_driver = { 1079 .flags = CPUFREQ_CONST_LOOPS, 1080 .verify = intel_pstate_verify_policy, 1081 .setpolicy = intel_pstate_set_policy, 1082 .get = intel_pstate_get, 1083 .init = intel_pstate_cpu_init, 1084 .stop_cpu = intel_pstate_stop_cpu, 1085 .name = "intel_pstate", 1086 }; 1087 1088 static int __initdata no_load; 1089 static int __initdata no_hwp; 1090 static int __initdata hwp_only; 1091 static unsigned int force_load; 1092 1093 static int intel_pstate_msrs_not_valid(void) 1094 { 1095 if (!pstate_funcs.get_max() || 1096 !pstate_funcs.get_min() || 1097 !pstate_funcs.get_turbo()) 1098 return -ENODEV; 1099 1100 return 0; 1101 } 1102 1103 static void copy_pid_params(struct pstate_adjust_policy *policy) 1104 { 1105 pid_params.sample_rate_ms = policy->sample_rate_ms; 1106 pid_params.p_gain_pct = policy->p_gain_pct; 1107 pid_params.i_gain_pct = policy->i_gain_pct; 1108 pid_params.d_gain_pct = policy->d_gain_pct; 1109 pid_params.deadband = policy->deadband; 1110 pid_params.setpoint = policy->setpoint; 1111 } 1112 1113 static void copy_cpu_funcs(struct pstate_funcs *funcs) 1114 { 1115 pstate_funcs.get_max = funcs->get_max; 1116 pstate_funcs.get_min = funcs->get_min; 1117 pstate_funcs.get_turbo = funcs->get_turbo; 1118 pstate_funcs.get_scaling = funcs->get_scaling; 1119 pstate_funcs.set = funcs->set; 1120 pstate_funcs.get_vid = funcs->get_vid; 1121 } 1122 1123 #if IS_ENABLED(CONFIG_ACPI) 1124 #include <acpi/processor.h> 1125 1126 static bool intel_pstate_no_acpi_pss(void) 1127 { 1128 int i; 1129 1130 for_each_possible_cpu(i) { 1131 acpi_status status; 1132 union acpi_object *pss; 1133 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 1134 struct acpi_processor *pr = per_cpu(processors, i); 1135 1136 if (!pr) 1137 continue; 1138 1139 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 1140 if (ACPI_FAILURE(status)) 1141 continue; 1142 1143 pss = buffer.pointer; 1144 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 1145 kfree(pss); 1146 return false; 1147 } 1148 1149 kfree(pss); 1150 } 1151 1152 return true; 1153 } 1154 1155 static bool intel_pstate_has_acpi_ppc(void) 1156 { 1157 int i; 1158 1159 for_each_possible_cpu(i) { 1160 struct acpi_processor *pr = per_cpu(processors, i); 1161 1162 if (!pr) 1163 continue; 1164 if (acpi_has_method(pr->handle, "_PPC")) 1165 return true; 1166 } 1167 return false; 1168 } 1169 1170 enum { 1171 PSS, 1172 PPC, 1173 }; 1174 1175 struct hw_vendor_info { 1176 u16 valid; 1177 char oem_id[ACPI_OEM_ID_SIZE]; 1178 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 1179 int oem_pwr_table; 1180 }; 1181 1182 /* Hardware vendor-specific info that has its own power management modes */ 1183 static struct hw_vendor_info vendor_info[] = { 1184 {1, "HP ", "ProLiant", PSS}, 1185 {1, "ORACLE", "X4-2 ", PPC}, 1186 {1, "ORACLE", "X4-2L ", PPC}, 1187 {1, "ORACLE", "X4-2B ", PPC}, 1188 {1, "ORACLE", "X3-2 ", PPC}, 1189 {1, "ORACLE", "X3-2L ", PPC}, 1190 {1, "ORACLE", "X3-2B ", PPC}, 1191 {1, "ORACLE", "X4470M2 ", PPC}, 1192 {1, "ORACLE", "X4270M3 ", PPC}, 1193 {1, "ORACLE", "X4270M2 ", PPC}, 1194 {1, "ORACLE", "X4170M2 ", PPC}, 1195 {1, "ORACLE", "X4170 M3", PPC}, 1196 {1, "ORACLE", "X4275 M3", PPC}, 1197 {1, "ORACLE", "X6-2 ", PPC}, 1198 {1, "ORACLE", "Sudbury ", PPC}, 1199 {0, "", ""}, 1200 }; 1201 1202 static bool intel_pstate_platform_pwr_mgmt_exists(void) 1203 { 1204 struct acpi_table_header hdr; 1205 struct hw_vendor_info *v_info; 1206 const struct x86_cpu_id *id; 1207 u64 misc_pwr; 1208 1209 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 1210 if (id) { 1211 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 1212 if ( misc_pwr & (1 << 8)) 1213 return true; 1214 } 1215 1216 if (acpi_disabled || 1217 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 1218 return false; 1219 1220 for (v_info = vendor_info; v_info->valid; v_info++) { 1221 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && 1222 !strncmp(hdr.oem_table_id, v_info->oem_table_id, 1223 ACPI_OEM_TABLE_ID_SIZE)) 1224 switch (v_info->oem_pwr_table) { 1225 case PSS: 1226 return intel_pstate_no_acpi_pss(); 1227 case PPC: 1228 return intel_pstate_has_acpi_ppc() && 1229 (!force_load); 1230 } 1231 } 1232 1233 return false; 1234 } 1235 #else /* CONFIG_ACPI not enabled */ 1236 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 1237 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 1238 #endif /* CONFIG_ACPI */ 1239 1240 static int __init intel_pstate_init(void) 1241 { 1242 int cpu, rc = 0; 1243 const struct x86_cpu_id *id; 1244 struct cpu_defaults *cpu_def; 1245 1246 if (no_load) 1247 return -ENODEV; 1248 1249 id = x86_match_cpu(intel_pstate_cpu_ids); 1250 if (!id) 1251 return -ENODEV; 1252 1253 /* 1254 * The Intel pstate driver will be ignored if the platform 1255 * firmware has its own power management modes. 1256 */ 1257 if (intel_pstate_platform_pwr_mgmt_exists()) 1258 return -ENODEV; 1259 1260 cpu_def = (struct cpu_defaults *)id->driver_data; 1261 1262 copy_pid_params(&cpu_def->pid_policy); 1263 copy_cpu_funcs(&cpu_def->funcs); 1264 1265 if (intel_pstate_msrs_not_valid()) 1266 return -ENODEV; 1267 1268 pr_info("Intel P-state driver initializing.\n"); 1269 1270 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 1271 if (!all_cpu_data) 1272 return -ENOMEM; 1273 1274 if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) 1275 hwp_active++; 1276 1277 if (!hwp_active && hwp_only) 1278 goto out; 1279 1280 rc = cpufreq_register_driver(&intel_pstate_driver); 1281 if (rc) 1282 goto out; 1283 1284 intel_pstate_debug_expose_params(); 1285 intel_pstate_sysfs_expose_params(); 1286 1287 return rc; 1288 out: 1289 get_online_cpus(); 1290 for_each_online_cpu(cpu) { 1291 if (all_cpu_data[cpu]) { 1292 del_timer_sync(&all_cpu_data[cpu]->timer); 1293 kfree(all_cpu_data[cpu]); 1294 } 1295 } 1296 1297 put_online_cpus(); 1298 vfree(all_cpu_data); 1299 return -ENODEV; 1300 } 1301 device_initcall(intel_pstate_init); 1302 1303 static int __init intel_pstate_setup(char *str) 1304 { 1305 if (!str) 1306 return -EINVAL; 1307 1308 if (!strcmp(str, "disable")) 1309 no_load = 1; 1310 if (!strcmp(str, "no_hwp")) 1311 no_hwp = 1; 1312 if (!strcmp(str, "force")) 1313 force_load = 1; 1314 if (!strcmp(str, "hwp_only")) 1315 hwp_only = 1; 1316 return 0; 1317 } 1318 early_param("intel_pstate", intel_pstate_setup); 1319 1320 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 1321 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 1322 MODULE_LICENSE("GPL"); 1323