1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/module.h> 16 #include <linux/ktime.h> 17 #include <linux/hrtimer.h> 18 #include <linux/tick.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/list.h> 22 #include <linux/cpu.h> 23 #include <linux/cpufreq.h> 24 #include <linux/sysfs.h> 25 #include <linux/types.h> 26 #include <linux/fs.h> 27 #include <linux/debugfs.h> 28 #include <linux/acpi.h> 29 #include <linux/vmalloc.h> 30 #include <trace/events/power.h> 31 32 #include <asm/div64.h> 33 #include <asm/msr.h> 34 #include <asm/cpu_device_id.h> 35 #include <asm/cpufeature.h> 36 37 #define BYT_RATIOS 0x66a 38 #define BYT_VIDS 0x66b 39 #define BYT_TURBO_RATIOS 0x66c 40 #define BYT_TURBO_VIDS 0x66d 41 42 #define FRAC_BITS 8 43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 44 #define fp_toint(X) ((X) >> FRAC_BITS) 45 46 47 static inline int32_t mul_fp(int32_t x, int32_t y) 48 { 49 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 50 } 51 52 static inline int32_t div_fp(s64 x, s64 y) 53 { 54 return div64_s64((int64_t)x << FRAC_BITS, y); 55 } 56 57 static inline int ceiling_fp(int32_t x) 58 { 59 int mask, ret; 60 61 ret = fp_toint(x); 62 mask = (1 << FRAC_BITS) - 1; 63 if (x & mask) 64 ret += 1; 65 return ret; 66 } 67 68 struct sample { 69 int32_t core_pct_busy; 70 u64 aperf; 71 u64 mperf; 72 u64 tsc; 73 int freq; 74 ktime_t time; 75 }; 76 77 struct pstate_data { 78 int current_pstate; 79 int min_pstate; 80 int max_pstate; 81 int scaling; 82 int turbo_pstate; 83 }; 84 85 struct vid_data { 86 int min; 87 int max; 88 int turbo; 89 int32_t ratio; 90 }; 91 92 struct _pid { 93 int setpoint; 94 int32_t integral; 95 int32_t p_gain; 96 int32_t i_gain; 97 int32_t d_gain; 98 int deadband; 99 int32_t last_err; 100 }; 101 102 struct cpudata { 103 int cpu; 104 105 struct timer_list timer; 106 107 struct pstate_data pstate; 108 struct vid_data vid; 109 struct _pid pid; 110 111 ktime_t last_sample_time; 112 u64 prev_aperf; 113 u64 prev_mperf; 114 u64 prev_tsc; 115 struct sample sample; 116 }; 117 118 static struct cpudata **all_cpu_data; 119 struct pstate_adjust_policy { 120 int sample_rate_ms; 121 int deadband; 122 int setpoint; 123 int p_gain_pct; 124 int d_gain_pct; 125 int i_gain_pct; 126 }; 127 128 struct pstate_funcs { 129 int (*get_max)(void); 130 int (*get_min)(void); 131 int (*get_turbo)(void); 132 int (*get_scaling)(void); 133 void (*set)(struct cpudata*, int pstate); 134 void (*get_vid)(struct cpudata *); 135 }; 136 137 struct cpu_defaults { 138 struct pstate_adjust_policy pid_policy; 139 struct pstate_funcs funcs; 140 }; 141 142 static struct pstate_adjust_policy pid_params; 143 static struct pstate_funcs pstate_funcs; 144 static int hwp_active; 145 146 struct perf_limits { 147 int no_turbo; 148 int turbo_disabled; 149 int max_perf_pct; 150 int min_perf_pct; 151 int32_t max_perf; 152 int32_t min_perf; 153 int max_policy_pct; 154 int max_sysfs_pct; 155 int min_policy_pct; 156 int min_sysfs_pct; 157 }; 158 159 static struct perf_limits limits = { 160 .no_turbo = 0, 161 .turbo_disabled = 0, 162 .max_perf_pct = 100, 163 .max_perf = int_tofp(1), 164 .min_perf_pct = 0, 165 .min_perf = 0, 166 .max_policy_pct = 100, 167 .max_sysfs_pct = 100, 168 .min_policy_pct = 0, 169 .min_sysfs_pct = 0, 170 }; 171 172 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 173 int deadband, int integral) { 174 pid->setpoint = setpoint; 175 pid->deadband = deadband; 176 pid->integral = int_tofp(integral); 177 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 178 } 179 180 static inline void pid_p_gain_set(struct _pid *pid, int percent) 181 { 182 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); 183 } 184 185 static inline void pid_i_gain_set(struct _pid *pid, int percent) 186 { 187 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); 188 } 189 190 static inline void pid_d_gain_set(struct _pid *pid, int percent) 191 { 192 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); 193 } 194 195 static signed int pid_calc(struct _pid *pid, int32_t busy) 196 { 197 signed int result; 198 int32_t pterm, dterm, fp_error; 199 int32_t integral_limit; 200 201 fp_error = int_tofp(pid->setpoint) - busy; 202 203 if (abs(fp_error) <= int_tofp(pid->deadband)) 204 return 0; 205 206 pterm = mul_fp(pid->p_gain, fp_error); 207 208 pid->integral += fp_error; 209 210 /* 211 * We limit the integral here so that it will never 212 * get higher than 30. This prevents it from becoming 213 * too large an input over long periods of time and allows 214 * it to get factored out sooner. 215 * 216 * The value of 30 was chosen through experimentation. 217 */ 218 integral_limit = int_tofp(30); 219 if (pid->integral > integral_limit) 220 pid->integral = integral_limit; 221 if (pid->integral < -integral_limit) 222 pid->integral = -integral_limit; 223 224 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 225 pid->last_err = fp_error; 226 227 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 228 result = result + (1 << (FRAC_BITS-1)); 229 return (signed int)fp_toint(result); 230 } 231 232 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 233 { 234 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 235 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 236 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 237 238 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); 239 } 240 241 static inline void intel_pstate_reset_all_pid(void) 242 { 243 unsigned int cpu; 244 245 for_each_online_cpu(cpu) { 246 if (all_cpu_data[cpu]) 247 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 248 } 249 } 250 251 static inline void update_turbo_state(void) 252 { 253 u64 misc_en; 254 struct cpudata *cpu; 255 256 cpu = all_cpu_data[0]; 257 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 258 limits.turbo_disabled = 259 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 260 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 261 } 262 263 static void intel_pstate_hwp_set(void) 264 { 265 int min, hw_min, max, hw_max, cpu, range, adj_range; 266 u64 value, cap; 267 268 rdmsrl(MSR_HWP_CAPABILITIES, cap); 269 hw_min = HWP_LOWEST_PERF(cap); 270 hw_max = HWP_HIGHEST_PERF(cap); 271 range = hw_max - hw_min; 272 273 get_online_cpus(); 274 275 for_each_online_cpu(cpu) { 276 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 277 adj_range = limits.min_perf_pct * range / 100; 278 min = hw_min + adj_range; 279 value &= ~HWP_MIN_PERF(~0L); 280 value |= HWP_MIN_PERF(min); 281 282 adj_range = limits.max_perf_pct * range / 100; 283 max = hw_min + adj_range; 284 if (limits.no_turbo) { 285 hw_max = HWP_GUARANTEED_PERF(cap); 286 if (hw_max < max) 287 max = hw_max; 288 } 289 290 value &= ~HWP_MAX_PERF(~0L); 291 value |= HWP_MAX_PERF(max); 292 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 293 } 294 295 put_online_cpus(); 296 } 297 298 /************************** debugfs begin ************************/ 299 static int pid_param_set(void *data, u64 val) 300 { 301 *(u32 *)data = val; 302 intel_pstate_reset_all_pid(); 303 return 0; 304 } 305 306 static int pid_param_get(void *data, u64 *val) 307 { 308 *val = *(u32 *)data; 309 return 0; 310 } 311 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); 312 313 struct pid_param { 314 char *name; 315 void *value; 316 }; 317 318 static struct pid_param pid_files[] = { 319 {"sample_rate_ms", &pid_params.sample_rate_ms}, 320 {"d_gain_pct", &pid_params.d_gain_pct}, 321 {"i_gain_pct", &pid_params.i_gain_pct}, 322 {"deadband", &pid_params.deadband}, 323 {"setpoint", &pid_params.setpoint}, 324 {"p_gain_pct", &pid_params.p_gain_pct}, 325 {NULL, NULL} 326 }; 327 328 static void __init intel_pstate_debug_expose_params(void) 329 { 330 struct dentry *debugfs_parent; 331 int i = 0; 332 333 if (hwp_active) 334 return; 335 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 336 if (IS_ERR_OR_NULL(debugfs_parent)) 337 return; 338 while (pid_files[i].name) { 339 debugfs_create_file(pid_files[i].name, 0660, 340 debugfs_parent, pid_files[i].value, 341 &fops_pid_param); 342 i++; 343 } 344 } 345 346 /************************** debugfs end ************************/ 347 348 /************************** sysfs begin ************************/ 349 #define show_one(file_name, object) \ 350 static ssize_t show_##file_name \ 351 (struct kobject *kobj, struct attribute *attr, char *buf) \ 352 { \ 353 return sprintf(buf, "%u\n", limits.object); \ 354 } 355 356 static ssize_t show_turbo_pct(struct kobject *kobj, 357 struct attribute *attr, char *buf) 358 { 359 struct cpudata *cpu; 360 int total, no_turbo, turbo_pct; 361 uint32_t turbo_fp; 362 363 cpu = all_cpu_data[0]; 364 365 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 366 no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; 367 turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); 368 turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); 369 return sprintf(buf, "%u\n", turbo_pct); 370 } 371 372 static ssize_t show_num_pstates(struct kobject *kobj, 373 struct attribute *attr, char *buf) 374 { 375 struct cpudata *cpu; 376 int total; 377 378 cpu = all_cpu_data[0]; 379 total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; 380 return sprintf(buf, "%u\n", total); 381 } 382 383 static ssize_t show_no_turbo(struct kobject *kobj, 384 struct attribute *attr, char *buf) 385 { 386 ssize_t ret; 387 388 update_turbo_state(); 389 if (limits.turbo_disabled) 390 ret = sprintf(buf, "%u\n", limits.turbo_disabled); 391 else 392 ret = sprintf(buf, "%u\n", limits.no_turbo); 393 394 return ret; 395 } 396 397 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 398 const char *buf, size_t count) 399 { 400 unsigned int input; 401 int ret; 402 403 ret = sscanf(buf, "%u", &input); 404 if (ret != 1) 405 return -EINVAL; 406 407 update_turbo_state(); 408 if (limits.turbo_disabled) { 409 pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); 410 return -EPERM; 411 } 412 413 limits.no_turbo = clamp_t(int, input, 0, 1); 414 415 if (hwp_active) 416 intel_pstate_hwp_set(); 417 418 return count; 419 } 420 421 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 422 const char *buf, size_t count) 423 { 424 unsigned int input; 425 int ret; 426 427 ret = sscanf(buf, "%u", &input); 428 if (ret != 1) 429 return -EINVAL; 430 431 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); 432 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 433 limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); 434 limits.max_perf_pct = max(limits.min_perf_pct, limits.max_perf_pct); 435 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 436 437 if (hwp_active) 438 intel_pstate_hwp_set(); 439 return count; 440 } 441 442 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 443 const char *buf, size_t count) 444 { 445 unsigned int input; 446 int ret; 447 448 ret = sscanf(buf, "%u", &input); 449 if (ret != 1) 450 return -EINVAL; 451 452 limits.min_sysfs_pct = clamp_t(int, input, 0 , 100); 453 limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); 454 limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); 455 limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); 456 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 457 458 if (hwp_active) 459 intel_pstate_hwp_set(); 460 return count; 461 } 462 463 show_one(max_perf_pct, max_perf_pct); 464 show_one(min_perf_pct, min_perf_pct); 465 466 define_one_global_rw(no_turbo); 467 define_one_global_rw(max_perf_pct); 468 define_one_global_rw(min_perf_pct); 469 define_one_global_ro(turbo_pct); 470 define_one_global_ro(num_pstates); 471 472 static struct attribute *intel_pstate_attributes[] = { 473 &no_turbo.attr, 474 &max_perf_pct.attr, 475 &min_perf_pct.attr, 476 &turbo_pct.attr, 477 &num_pstates.attr, 478 NULL 479 }; 480 481 static struct attribute_group intel_pstate_attr_group = { 482 .attrs = intel_pstate_attributes, 483 }; 484 485 static void __init intel_pstate_sysfs_expose_params(void) 486 { 487 struct kobject *intel_pstate_kobject; 488 int rc; 489 490 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 491 &cpu_subsys.dev_root->kobj); 492 BUG_ON(!intel_pstate_kobject); 493 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 494 BUG_ON(rc); 495 } 496 /************************** sysfs end ************************/ 497 498 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 499 { 500 pr_info("intel_pstate: HWP enabled\n"); 501 502 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 503 } 504 505 static int byt_get_min_pstate(void) 506 { 507 u64 value; 508 509 rdmsrl(BYT_RATIOS, value); 510 return (value >> 8) & 0x7F; 511 } 512 513 static int byt_get_max_pstate(void) 514 { 515 u64 value; 516 517 rdmsrl(BYT_RATIOS, value); 518 return (value >> 16) & 0x7F; 519 } 520 521 static int byt_get_turbo_pstate(void) 522 { 523 u64 value; 524 525 rdmsrl(BYT_TURBO_RATIOS, value); 526 return value & 0x7F; 527 } 528 529 static void byt_set_pstate(struct cpudata *cpudata, int pstate) 530 { 531 u64 val; 532 int32_t vid_fp; 533 u32 vid; 534 535 val = (u64)pstate << 8; 536 if (limits.no_turbo && !limits.turbo_disabled) 537 val |= (u64)1 << 32; 538 539 vid_fp = cpudata->vid.min + mul_fp( 540 int_tofp(pstate - cpudata->pstate.min_pstate), 541 cpudata->vid.ratio); 542 543 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 544 vid = ceiling_fp(vid_fp); 545 546 if (pstate > cpudata->pstate.max_pstate) 547 vid = cpudata->vid.turbo; 548 549 val |= vid; 550 551 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 552 } 553 554 #define BYT_BCLK_FREQS 5 555 static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; 556 557 static int byt_get_scaling(void) 558 { 559 u64 value; 560 int i; 561 562 rdmsrl(MSR_FSB_FREQ, value); 563 i = value & 0x3; 564 565 BUG_ON(i > BYT_BCLK_FREQS); 566 567 return byt_freq_table[i] * 100; 568 } 569 570 static void byt_get_vid(struct cpudata *cpudata) 571 { 572 u64 value; 573 574 rdmsrl(BYT_VIDS, value); 575 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 576 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 577 cpudata->vid.ratio = div_fp( 578 cpudata->vid.max - cpudata->vid.min, 579 int_tofp(cpudata->pstate.max_pstate - 580 cpudata->pstate.min_pstate)); 581 582 rdmsrl(BYT_TURBO_VIDS, value); 583 cpudata->vid.turbo = value & 0x7f; 584 } 585 586 static int core_get_min_pstate(void) 587 { 588 u64 value; 589 590 rdmsrl(MSR_PLATFORM_INFO, value); 591 return (value >> 40) & 0xFF; 592 } 593 594 static int core_get_max_pstate(void) 595 { 596 u64 value; 597 598 rdmsrl(MSR_PLATFORM_INFO, value); 599 return (value >> 8) & 0xFF; 600 } 601 602 static int core_get_turbo_pstate(void) 603 { 604 u64 value; 605 int nont, ret; 606 607 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 608 nont = core_get_max_pstate(); 609 ret = (value) & 255; 610 if (ret <= nont) 611 ret = nont; 612 return ret; 613 } 614 615 static inline int core_get_scaling(void) 616 { 617 return 100000; 618 } 619 620 static void core_set_pstate(struct cpudata *cpudata, int pstate) 621 { 622 u64 val; 623 624 val = (u64)pstate << 8; 625 if (limits.no_turbo && !limits.turbo_disabled) 626 val |= (u64)1 << 32; 627 628 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 629 } 630 631 static int knl_get_turbo_pstate(void) 632 { 633 u64 value; 634 int nont, ret; 635 636 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 637 nont = core_get_max_pstate(); 638 ret = (((value) >> 8) & 0xFF); 639 if (ret <= nont) 640 ret = nont; 641 return ret; 642 } 643 644 static struct cpu_defaults core_params = { 645 .pid_policy = { 646 .sample_rate_ms = 10, 647 .deadband = 0, 648 .setpoint = 97, 649 .p_gain_pct = 20, 650 .d_gain_pct = 0, 651 .i_gain_pct = 0, 652 }, 653 .funcs = { 654 .get_max = core_get_max_pstate, 655 .get_min = core_get_min_pstate, 656 .get_turbo = core_get_turbo_pstate, 657 .get_scaling = core_get_scaling, 658 .set = core_set_pstate, 659 }, 660 }; 661 662 static struct cpu_defaults byt_params = { 663 .pid_policy = { 664 .sample_rate_ms = 10, 665 .deadband = 0, 666 .setpoint = 60, 667 .p_gain_pct = 14, 668 .d_gain_pct = 0, 669 .i_gain_pct = 4, 670 }, 671 .funcs = { 672 .get_max = byt_get_max_pstate, 673 .get_min = byt_get_min_pstate, 674 .get_turbo = byt_get_turbo_pstate, 675 .set = byt_set_pstate, 676 .get_scaling = byt_get_scaling, 677 .get_vid = byt_get_vid, 678 }, 679 }; 680 681 static struct cpu_defaults knl_params = { 682 .pid_policy = { 683 .sample_rate_ms = 10, 684 .deadband = 0, 685 .setpoint = 97, 686 .p_gain_pct = 20, 687 .d_gain_pct = 0, 688 .i_gain_pct = 0, 689 }, 690 .funcs = { 691 .get_max = core_get_max_pstate, 692 .get_min = core_get_min_pstate, 693 .get_turbo = knl_get_turbo_pstate, 694 .get_scaling = core_get_scaling, 695 .set = core_set_pstate, 696 }, 697 }; 698 699 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 700 { 701 int max_perf = cpu->pstate.turbo_pstate; 702 int max_perf_adj; 703 int min_perf; 704 705 if (limits.no_turbo || limits.turbo_disabled) 706 max_perf = cpu->pstate.max_pstate; 707 708 /* 709 * performance can be limited by user through sysfs, by cpufreq 710 * policy, or by cpu specific default values determined through 711 * experimentation. 712 */ 713 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 714 *max = clamp_t(int, max_perf_adj, 715 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 716 717 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); 718 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 719 } 720 721 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) 722 { 723 int max_perf, min_perf; 724 725 if (force) { 726 update_turbo_state(); 727 728 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 729 730 pstate = clamp_t(int, pstate, min_perf, max_perf); 731 732 if (pstate == cpu->pstate.current_pstate) 733 return; 734 } 735 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 736 737 cpu->pstate.current_pstate = pstate; 738 739 pstate_funcs.set(cpu, pstate); 740 } 741 742 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 743 { 744 cpu->pstate.min_pstate = pstate_funcs.get_min(); 745 cpu->pstate.max_pstate = pstate_funcs.get_max(); 746 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 747 cpu->pstate.scaling = pstate_funcs.get_scaling(); 748 749 if (pstate_funcs.get_vid) 750 pstate_funcs.get_vid(cpu); 751 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); 752 } 753 754 static inline void intel_pstate_calc_busy(struct cpudata *cpu) 755 { 756 struct sample *sample = &cpu->sample; 757 int64_t core_pct; 758 759 core_pct = int_tofp(sample->aperf) * int_tofp(100); 760 core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); 761 762 sample->freq = fp_toint( 763 mul_fp(int_tofp( 764 cpu->pstate.max_pstate * cpu->pstate.scaling / 100), 765 core_pct)); 766 767 sample->core_pct_busy = (int32_t)core_pct; 768 } 769 770 static inline void intel_pstate_sample(struct cpudata *cpu) 771 { 772 u64 aperf, mperf; 773 unsigned long flags; 774 u64 tsc; 775 776 local_irq_save(flags); 777 rdmsrl(MSR_IA32_APERF, aperf); 778 rdmsrl(MSR_IA32_MPERF, mperf); 779 if (cpu->prev_mperf == mperf) { 780 local_irq_restore(flags); 781 return; 782 } 783 784 tsc = rdtsc(); 785 local_irq_restore(flags); 786 787 cpu->last_sample_time = cpu->sample.time; 788 cpu->sample.time = ktime_get(); 789 cpu->sample.aperf = aperf; 790 cpu->sample.mperf = mperf; 791 cpu->sample.tsc = tsc; 792 cpu->sample.aperf -= cpu->prev_aperf; 793 cpu->sample.mperf -= cpu->prev_mperf; 794 cpu->sample.tsc -= cpu->prev_tsc; 795 796 intel_pstate_calc_busy(cpu); 797 798 cpu->prev_aperf = aperf; 799 cpu->prev_mperf = mperf; 800 cpu->prev_tsc = tsc; 801 } 802 803 static inline void intel_hwp_set_sample_time(struct cpudata *cpu) 804 { 805 int delay; 806 807 delay = msecs_to_jiffies(50); 808 mod_timer_pinned(&cpu->timer, jiffies + delay); 809 } 810 811 static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 812 { 813 int delay; 814 815 delay = msecs_to_jiffies(pid_params.sample_rate_ms); 816 mod_timer_pinned(&cpu->timer, jiffies + delay); 817 } 818 819 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 820 { 821 int32_t core_busy, max_pstate, current_pstate, sample_ratio; 822 s64 duration_us; 823 u32 sample_time; 824 825 /* 826 * core_busy is the ratio of actual performance to max 827 * max_pstate is the max non turbo pstate available 828 * current_pstate was the pstate that was requested during 829 * the last sample period. 830 * 831 * We normalize core_busy, which was our actual percent 832 * performance to what we requested during the last sample 833 * period. The result will be a percentage of busy at a 834 * specified pstate. 835 */ 836 core_busy = cpu->sample.core_pct_busy; 837 max_pstate = int_tofp(cpu->pstate.max_pstate); 838 current_pstate = int_tofp(cpu->pstate.current_pstate); 839 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 840 841 /* 842 * Since we have a deferred timer, it will not fire unless 843 * we are in C0. So, determine if the actual elapsed time 844 * is significantly greater (3x) than our sample interval. If it 845 * is, then we were idle for a long enough period of time 846 * to adjust our busyness. 847 */ 848 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; 849 duration_us = ktime_us_delta(cpu->sample.time, 850 cpu->last_sample_time); 851 if (duration_us > sample_time * 3) { 852 sample_ratio = div_fp(int_tofp(sample_time), 853 int_tofp(duration_us)); 854 core_busy = mul_fp(core_busy, sample_ratio); 855 } 856 857 return core_busy; 858 } 859 860 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 861 { 862 int32_t busy_scaled; 863 struct _pid *pid; 864 signed int ctl; 865 int from; 866 struct sample *sample; 867 868 from = cpu->pstate.current_pstate; 869 870 pid = &cpu->pid; 871 busy_scaled = intel_pstate_get_scaled_busy(cpu); 872 873 ctl = pid_calc(pid, busy_scaled); 874 875 /* Negative values of ctl increase the pstate and vice versa */ 876 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); 877 878 sample = &cpu->sample; 879 trace_pstate_sample(fp_toint(sample->core_pct_busy), 880 fp_toint(busy_scaled), 881 from, 882 cpu->pstate.current_pstate, 883 sample->mperf, 884 sample->aperf, 885 sample->tsc, 886 sample->freq); 887 } 888 889 static void intel_hwp_timer_func(unsigned long __data) 890 { 891 struct cpudata *cpu = (struct cpudata *) __data; 892 893 intel_pstate_sample(cpu); 894 intel_hwp_set_sample_time(cpu); 895 } 896 897 static void intel_pstate_timer_func(unsigned long __data) 898 { 899 struct cpudata *cpu = (struct cpudata *) __data; 900 901 intel_pstate_sample(cpu); 902 903 intel_pstate_adjust_busy_pstate(cpu); 904 905 intel_pstate_set_sample_time(cpu); 906 } 907 908 #define ICPU(model, policy) \ 909 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 910 (unsigned long)&policy } 911 912 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 913 ICPU(0x2a, core_params), 914 ICPU(0x2d, core_params), 915 ICPU(0x37, byt_params), 916 ICPU(0x3a, core_params), 917 ICPU(0x3c, core_params), 918 ICPU(0x3d, core_params), 919 ICPU(0x3e, core_params), 920 ICPU(0x3f, core_params), 921 ICPU(0x45, core_params), 922 ICPU(0x46, core_params), 923 ICPU(0x47, core_params), 924 ICPU(0x4c, byt_params), 925 ICPU(0x4e, core_params), 926 ICPU(0x4f, core_params), 927 ICPU(0x5e, core_params), 928 ICPU(0x56, core_params), 929 ICPU(0x57, knl_params), 930 {} 931 }; 932 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 933 934 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { 935 ICPU(0x56, core_params), 936 {} 937 }; 938 939 static int intel_pstate_init_cpu(unsigned int cpunum) 940 { 941 struct cpudata *cpu; 942 943 if (!all_cpu_data[cpunum]) 944 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), 945 GFP_KERNEL); 946 if (!all_cpu_data[cpunum]) 947 return -ENOMEM; 948 949 cpu = all_cpu_data[cpunum]; 950 951 cpu->cpu = cpunum; 952 953 if (hwp_active) 954 intel_pstate_hwp_enable(cpu); 955 956 intel_pstate_get_cpu_pstates(cpu); 957 958 init_timer_deferrable(&cpu->timer); 959 cpu->timer.data = (unsigned long)cpu; 960 cpu->timer.expires = jiffies + HZ/100; 961 962 if (!hwp_active) 963 cpu->timer.function = intel_pstate_timer_func; 964 else 965 cpu->timer.function = intel_hwp_timer_func; 966 967 intel_pstate_busy_pid_reset(cpu); 968 intel_pstate_sample(cpu); 969 970 add_timer_on(&cpu->timer, cpunum); 971 972 pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); 973 974 return 0; 975 } 976 977 static unsigned int intel_pstate_get(unsigned int cpu_num) 978 { 979 struct sample *sample; 980 struct cpudata *cpu; 981 982 cpu = all_cpu_data[cpu_num]; 983 if (!cpu) 984 return 0; 985 sample = &cpu->sample; 986 return sample->freq; 987 } 988 989 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 990 { 991 if (!policy->cpuinfo.max_freq) 992 return -ENODEV; 993 994 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && 995 policy->max >= policy->cpuinfo.max_freq) { 996 limits.min_policy_pct = 100; 997 limits.min_perf_pct = 100; 998 limits.min_perf = int_tofp(1); 999 limits.max_policy_pct = 100; 1000 limits.max_perf_pct = 100; 1001 limits.max_perf = int_tofp(1); 1002 limits.no_turbo = 0; 1003 return 0; 1004 } 1005 1006 limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 1007 limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100); 1008 limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; 1009 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); 1010 1011 /* Normalize user input to [min_policy_pct, max_policy_pct] */ 1012 limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); 1013 limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); 1014 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 1015 limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); 1016 1017 /* Make sure min_perf_pct <= max_perf_pct */ 1018 limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); 1019 1020 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 1021 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 1022 1023 if (hwp_active) 1024 intel_pstate_hwp_set(); 1025 1026 return 0; 1027 } 1028 1029 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 1030 { 1031 cpufreq_verify_within_cpu_limits(policy); 1032 1033 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 1034 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 1035 return -EINVAL; 1036 1037 return 0; 1038 } 1039 1040 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 1041 { 1042 int cpu_num = policy->cpu; 1043 struct cpudata *cpu = all_cpu_data[cpu_num]; 1044 1045 pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); 1046 1047 del_timer_sync(&all_cpu_data[cpu_num]->timer); 1048 if (hwp_active) 1049 return; 1050 1051 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); 1052 } 1053 1054 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 1055 { 1056 struct cpudata *cpu; 1057 int rc; 1058 1059 rc = intel_pstate_init_cpu(policy->cpu); 1060 if (rc) 1061 return rc; 1062 1063 cpu = all_cpu_data[policy->cpu]; 1064 1065 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 1066 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1067 else 1068 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1069 1070 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 1071 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1072 1073 /* cpuinfo and default policy values */ 1074 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 1075 policy->cpuinfo.max_freq = 1076 cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1077 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 1078 cpumask_set_cpu(policy->cpu, policy->cpus); 1079 1080 return 0; 1081 } 1082 1083 static struct cpufreq_driver intel_pstate_driver = { 1084 .flags = CPUFREQ_CONST_LOOPS, 1085 .verify = intel_pstate_verify_policy, 1086 .setpolicy = intel_pstate_set_policy, 1087 .get = intel_pstate_get, 1088 .init = intel_pstate_cpu_init, 1089 .stop_cpu = intel_pstate_stop_cpu, 1090 .name = "intel_pstate", 1091 }; 1092 1093 static int __initdata no_load; 1094 static int __initdata no_hwp; 1095 static int __initdata hwp_only; 1096 static unsigned int force_load; 1097 1098 static int intel_pstate_msrs_not_valid(void) 1099 { 1100 if (!pstate_funcs.get_max() || 1101 !pstate_funcs.get_min() || 1102 !pstate_funcs.get_turbo()) 1103 return -ENODEV; 1104 1105 return 0; 1106 } 1107 1108 static void copy_pid_params(struct pstate_adjust_policy *policy) 1109 { 1110 pid_params.sample_rate_ms = policy->sample_rate_ms; 1111 pid_params.p_gain_pct = policy->p_gain_pct; 1112 pid_params.i_gain_pct = policy->i_gain_pct; 1113 pid_params.d_gain_pct = policy->d_gain_pct; 1114 pid_params.deadband = policy->deadband; 1115 pid_params.setpoint = policy->setpoint; 1116 } 1117 1118 static void copy_cpu_funcs(struct pstate_funcs *funcs) 1119 { 1120 pstate_funcs.get_max = funcs->get_max; 1121 pstate_funcs.get_min = funcs->get_min; 1122 pstate_funcs.get_turbo = funcs->get_turbo; 1123 pstate_funcs.get_scaling = funcs->get_scaling; 1124 pstate_funcs.set = funcs->set; 1125 pstate_funcs.get_vid = funcs->get_vid; 1126 } 1127 1128 #if IS_ENABLED(CONFIG_ACPI) 1129 #include <acpi/processor.h> 1130 1131 static bool intel_pstate_no_acpi_pss(void) 1132 { 1133 int i; 1134 1135 for_each_possible_cpu(i) { 1136 acpi_status status; 1137 union acpi_object *pss; 1138 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 1139 struct acpi_processor *pr = per_cpu(processors, i); 1140 1141 if (!pr) 1142 continue; 1143 1144 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 1145 if (ACPI_FAILURE(status)) 1146 continue; 1147 1148 pss = buffer.pointer; 1149 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 1150 kfree(pss); 1151 return false; 1152 } 1153 1154 kfree(pss); 1155 } 1156 1157 return true; 1158 } 1159 1160 static bool intel_pstate_has_acpi_ppc(void) 1161 { 1162 int i; 1163 1164 for_each_possible_cpu(i) { 1165 struct acpi_processor *pr = per_cpu(processors, i); 1166 1167 if (!pr) 1168 continue; 1169 if (acpi_has_method(pr->handle, "_PPC")) 1170 return true; 1171 } 1172 return false; 1173 } 1174 1175 enum { 1176 PSS, 1177 PPC, 1178 }; 1179 1180 struct hw_vendor_info { 1181 u16 valid; 1182 char oem_id[ACPI_OEM_ID_SIZE]; 1183 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 1184 int oem_pwr_table; 1185 }; 1186 1187 /* Hardware vendor-specific info that has its own power management modes */ 1188 static struct hw_vendor_info vendor_info[] = { 1189 {1, "HP ", "ProLiant", PSS}, 1190 {1, "ORACLE", "X4-2 ", PPC}, 1191 {1, "ORACLE", "X4-2L ", PPC}, 1192 {1, "ORACLE", "X4-2B ", PPC}, 1193 {1, "ORACLE", "X3-2 ", PPC}, 1194 {1, "ORACLE", "X3-2L ", PPC}, 1195 {1, "ORACLE", "X3-2B ", PPC}, 1196 {1, "ORACLE", "X4470M2 ", PPC}, 1197 {1, "ORACLE", "X4270M3 ", PPC}, 1198 {1, "ORACLE", "X4270M2 ", PPC}, 1199 {1, "ORACLE", "X4170M2 ", PPC}, 1200 {1, "ORACLE", "X4170 M3", PPC}, 1201 {1, "ORACLE", "X4275 M3", PPC}, 1202 {1, "ORACLE", "X6-2 ", PPC}, 1203 {1, "ORACLE", "Sudbury ", PPC}, 1204 {0, "", ""}, 1205 }; 1206 1207 static bool intel_pstate_platform_pwr_mgmt_exists(void) 1208 { 1209 struct acpi_table_header hdr; 1210 struct hw_vendor_info *v_info; 1211 const struct x86_cpu_id *id; 1212 u64 misc_pwr; 1213 1214 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 1215 if (id) { 1216 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 1217 if ( misc_pwr & (1 << 8)) 1218 return true; 1219 } 1220 1221 if (acpi_disabled || 1222 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 1223 return false; 1224 1225 for (v_info = vendor_info; v_info->valid; v_info++) { 1226 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && 1227 !strncmp(hdr.oem_table_id, v_info->oem_table_id, 1228 ACPI_OEM_TABLE_ID_SIZE)) 1229 switch (v_info->oem_pwr_table) { 1230 case PSS: 1231 return intel_pstate_no_acpi_pss(); 1232 case PPC: 1233 return intel_pstate_has_acpi_ppc() && 1234 (!force_load); 1235 } 1236 } 1237 1238 return false; 1239 } 1240 #else /* CONFIG_ACPI not enabled */ 1241 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 1242 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 1243 #endif /* CONFIG_ACPI */ 1244 1245 static int __init intel_pstate_init(void) 1246 { 1247 int cpu, rc = 0; 1248 const struct x86_cpu_id *id; 1249 struct cpu_defaults *cpu_def; 1250 1251 if (no_load) 1252 return -ENODEV; 1253 1254 id = x86_match_cpu(intel_pstate_cpu_ids); 1255 if (!id) 1256 return -ENODEV; 1257 1258 /* 1259 * The Intel pstate driver will be ignored if the platform 1260 * firmware has its own power management modes. 1261 */ 1262 if (intel_pstate_platform_pwr_mgmt_exists()) 1263 return -ENODEV; 1264 1265 cpu_def = (struct cpu_defaults *)id->driver_data; 1266 1267 copy_pid_params(&cpu_def->pid_policy); 1268 copy_cpu_funcs(&cpu_def->funcs); 1269 1270 if (intel_pstate_msrs_not_valid()) 1271 return -ENODEV; 1272 1273 pr_info("Intel P-state driver initializing.\n"); 1274 1275 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 1276 if (!all_cpu_data) 1277 return -ENOMEM; 1278 1279 if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) 1280 hwp_active++; 1281 1282 if (!hwp_active && hwp_only) 1283 goto out; 1284 1285 rc = cpufreq_register_driver(&intel_pstate_driver); 1286 if (rc) 1287 goto out; 1288 1289 intel_pstate_debug_expose_params(); 1290 intel_pstate_sysfs_expose_params(); 1291 1292 return rc; 1293 out: 1294 get_online_cpus(); 1295 for_each_online_cpu(cpu) { 1296 if (all_cpu_data[cpu]) { 1297 del_timer_sync(&all_cpu_data[cpu]->timer); 1298 kfree(all_cpu_data[cpu]); 1299 } 1300 } 1301 1302 put_online_cpus(); 1303 vfree(all_cpu_data); 1304 return -ENODEV; 1305 } 1306 device_initcall(intel_pstate_init); 1307 1308 static int __init intel_pstate_setup(char *str) 1309 { 1310 if (!str) 1311 return -EINVAL; 1312 1313 if (!strcmp(str, "disable")) 1314 no_load = 1; 1315 if (!strcmp(str, "no_hwp")) 1316 no_hwp = 1; 1317 if (!strcmp(str, "force")) 1318 force_load = 1; 1319 if (!strcmp(str, "hwp_only")) 1320 hwp_only = 1; 1321 return 0; 1322 } 1323 early_param("intel_pstate", intel_pstate_setup); 1324 1325 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 1326 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 1327 MODULE_LICENSE("GPL"); 1328