1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/module.h> 16 #include <linux/ktime.h> 17 #include <linux/hrtimer.h> 18 #include <linux/tick.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/list.h> 22 #include <linux/cpu.h> 23 #include <linux/cpufreq.h> 24 #include <linux/sysfs.h> 25 #include <linux/types.h> 26 #include <linux/fs.h> 27 #include <linux/debugfs.h> 28 #include <linux/acpi.h> 29 #include <trace/events/power.h> 30 31 #include <asm/div64.h> 32 #include <asm/msr.h> 33 #include <asm/cpu_device_id.h> 34 35 #define SAMPLE_COUNT 3 36 37 #define BYT_RATIOS 0x66a 38 #define BYT_VIDS 0x66b 39 #define BYT_TURBO_RATIOS 0x66c 40 41 42 #define FRAC_BITS 6 43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 44 #define fp_toint(X) ((X) >> FRAC_BITS) 45 #define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) 46 47 static inline int32_t mul_fp(int32_t x, int32_t y) 48 { 49 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 50 } 51 52 static inline int32_t div_fp(int32_t x, int32_t y) 53 { 54 return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); 55 } 56 57 struct sample { 58 int32_t core_pct_busy; 59 u64 aperf; 60 u64 mperf; 61 unsigned long long tsc; 62 int freq; 63 }; 64 65 struct pstate_data { 66 int current_pstate; 67 int min_pstate; 68 int max_pstate; 69 int turbo_pstate; 70 }; 71 72 struct vid_data { 73 int32_t min; 74 int32_t max; 75 int32_t ratio; 76 }; 77 78 struct _pid { 79 int setpoint; 80 int32_t integral; 81 int32_t p_gain; 82 int32_t i_gain; 83 int32_t d_gain; 84 int deadband; 85 int32_t last_err; 86 }; 87 88 struct cpudata { 89 int cpu; 90 91 char name[64]; 92 93 struct timer_list timer; 94 95 struct pstate_data pstate; 96 struct vid_data vid; 97 struct _pid pid; 98 99 u64 prev_aperf; 100 u64 prev_mperf; 101 unsigned long long prev_tsc; 102 int sample_ptr; 103 struct sample samples[SAMPLE_COUNT]; 104 }; 105 106 static struct cpudata **all_cpu_data; 107 struct pstate_adjust_policy { 108 int sample_rate_ms; 109 int deadband; 110 int setpoint; 111 int p_gain_pct; 112 int d_gain_pct; 113 int i_gain_pct; 114 }; 115 116 struct pstate_funcs { 117 int (*get_max)(void); 118 int (*get_min)(void); 119 int (*get_turbo)(void); 120 void (*set)(struct cpudata*, int pstate); 121 void (*get_vid)(struct cpudata *); 122 }; 123 124 struct cpu_defaults { 125 struct pstate_adjust_policy pid_policy; 126 struct pstate_funcs funcs; 127 }; 128 129 static struct pstate_adjust_policy pid_params; 130 static struct pstate_funcs pstate_funcs; 131 132 struct perf_limits { 133 int no_turbo; 134 int max_perf_pct; 135 int min_perf_pct; 136 int32_t max_perf; 137 int32_t min_perf; 138 int max_policy_pct; 139 int max_sysfs_pct; 140 }; 141 142 static struct perf_limits limits = { 143 .no_turbo = 0, 144 .max_perf_pct = 100, 145 .max_perf = int_tofp(1), 146 .min_perf_pct = 0, 147 .min_perf = 0, 148 .max_policy_pct = 100, 149 .max_sysfs_pct = 100, 150 }; 151 152 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 153 int deadband, int integral) { 154 pid->setpoint = setpoint; 155 pid->deadband = deadband; 156 pid->integral = int_tofp(integral); 157 pid->last_err = setpoint - busy; 158 } 159 160 static inline void pid_p_gain_set(struct _pid *pid, int percent) 161 { 162 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); 163 } 164 165 static inline void pid_i_gain_set(struct _pid *pid, int percent) 166 { 167 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); 168 } 169 170 static inline void pid_d_gain_set(struct _pid *pid, int percent) 171 { 172 173 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); 174 } 175 176 static signed int pid_calc(struct _pid *pid, int32_t busy) 177 { 178 signed int result; 179 int32_t pterm, dterm, fp_error; 180 int32_t integral_limit; 181 182 fp_error = int_tofp(pid->setpoint) - busy; 183 184 if (abs(fp_error) <= int_tofp(pid->deadband)) 185 return 0; 186 187 pterm = mul_fp(pid->p_gain, fp_error); 188 189 pid->integral += fp_error; 190 191 /* limit the integral term */ 192 integral_limit = int_tofp(30); 193 if (pid->integral > integral_limit) 194 pid->integral = integral_limit; 195 if (pid->integral < -integral_limit) 196 pid->integral = -integral_limit; 197 198 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 199 pid->last_err = fp_error; 200 201 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 202 203 return (signed int)fp_toint(result); 204 } 205 206 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 207 { 208 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 209 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 210 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 211 212 pid_reset(&cpu->pid, 213 pid_params.setpoint, 214 100, 215 pid_params.deadband, 216 0); 217 } 218 219 static inline void intel_pstate_reset_all_pid(void) 220 { 221 unsigned int cpu; 222 for_each_online_cpu(cpu) { 223 if (all_cpu_data[cpu]) 224 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 225 } 226 } 227 228 /************************** debugfs begin ************************/ 229 static int pid_param_set(void *data, u64 val) 230 { 231 *(u32 *)data = val; 232 intel_pstate_reset_all_pid(); 233 return 0; 234 } 235 static int pid_param_get(void *data, u64 *val) 236 { 237 *val = *(u32 *)data; 238 return 0; 239 } 240 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, 241 pid_param_set, "%llu\n"); 242 243 struct pid_param { 244 char *name; 245 void *value; 246 }; 247 248 static struct pid_param pid_files[] = { 249 {"sample_rate_ms", &pid_params.sample_rate_ms}, 250 {"d_gain_pct", &pid_params.d_gain_pct}, 251 {"i_gain_pct", &pid_params.i_gain_pct}, 252 {"deadband", &pid_params.deadband}, 253 {"setpoint", &pid_params.setpoint}, 254 {"p_gain_pct", &pid_params.p_gain_pct}, 255 {NULL, NULL} 256 }; 257 258 static struct dentry *debugfs_parent; 259 static void intel_pstate_debug_expose_params(void) 260 { 261 int i = 0; 262 263 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 264 if (IS_ERR_OR_NULL(debugfs_parent)) 265 return; 266 while (pid_files[i].name) { 267 debugfs_create_file(pid_files[i].name, 0660, 268 debugfs_parent, pid_files[i].value, 269 &fops_pid_param); 270 i++; 271 } 272 } 273 274 /************************** debugfs end ************************/ 275 276 /************************** sysfs begin ************************/ 277 #define show_one(file_name, object) \ 278 static ssize_t show_##file_name \ 279 (struct kobject *kobj, struct attribute *attr, char *buf) \ 280 { \ 281 return sprintf(buf, "%u\n", limits.object); \ 282 } 283 284 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 285 const char *buf, size_t count) 286 { 287 unsigned int input; 288 int ret; 289 ret = sscanf(buf, "%u", &input); 290 if (ret != 1) 291 return -EINVAL; 292 limits.no_turbo = clamp_t(int, input, 0 , 1); 293 294 return count; 295 } 296 297 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 298 const char *buf, size_t count) 299 { 300 unsigned int input; 301 int ret; 302 ret = sscanf(buf, "%u", &input); 303 if (ret != 1) 304 return -EINVAL; 305 306 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); 307 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 308 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 309 return count; 310 } 311 312 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 313 const char *buf, size_t count) 314 { 315 unsigned int input; 316 int ret; 317 ret = sscanf(buf, "%u", &input); 318 if (ret != 1) 319 return -EINVAL; 320 limits.min_perf_pct = clamp_t(int, input, 0 , 100); 321 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 322 323 return count; 324 } 325 326 show_one(no_turbo, no_turbo); 327 show_one(max_perf_pct, max_perf_pct); 328 show_one(min_perf_pct, min_perf_pct); 329 330 define_one_global_rw(no_turbo); 331 define_one_global_rw(max_perf_pct); 332 define_one_global_rw(min_perf_pct); 333 334 static struct attribute *intel_pstate_attributes[] = { 335 &no_turbo.attr, 336 &max_perf_pct.attr, 337 &min_perf_pct.attr, 338 NULL 339 }; 340 341 static struct attribute_group intel_pstate_attr_group = { 342 .attrs = intel_pstate_attributes, 343 }; 344 static struct kobject *intel_pstate_kobject; 345 346 static void intel_pstate_sysfs_expose_params(void) 347 { 348 int rc; 349 350 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 351 &cpu_subsys.dev_root->kobj); 352 BUG_ON(!intel_pstate_kobject); 353 rc = sysfs_create_group(intel_pstate_kobject, 354 &intel_pstate_attr_group); 355 BUG_ON(rc); 356 } 357 358 /************************** sysfs end ************************/ 359 static int byt_get_min_pstate(void) 360 { 361 u64 value; 362 rdmsrl(BYT_RATIOS, value); 363 return (value >> 8) & 0xFF; 364 } 365 366 static int byt_get_max_pstate(void) 367 { 368 u64 value; 369 rdmsrl(BYT_RATIOS, value); 370 return (value >> 16) & 0xFF; 371 } 372 373 static int byt_get_turbo_pstate(void) 374 { 375 u64 value; 376 rdmsrl(BYT_TURBO_RATIOS, value); 377 return value & 0x3F; 378 } 379 380 static void byt_set_pstate(struct cpudata *cpudata, int pstate) 381 { 382 u64 val; 383 int32_t vid_fp; 384 u32 vid; 385 386 val = pstate << 8; 387 if (limits.no_turbo) 388 val |= (u64)1 << 32; 389 390 vid_fp = cpudata->vid.min + mul_fp( 391 int_tofp(pstate - cpudata->pstate.min_pstate), 392 cpudata->vid.ratio); 393 394 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 395 vid = fp_toint(vid_fp); 396 397 val |= vid; 398 399 wrmsrl(MSR_IA32_PERF_CTL, val); 400 } 401 402 static void byt_get_vid(struct cpudata *cpudata) 403 { 404 u64 value; 405 406 rdmsrl(BYT_VIDS, value); 407 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 408 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 409 cpudata->vid.ratio = div_fp( 410 cpudata->vid.max - cpudata->vid.min, 411 int_tofp(cpudata->pstate.max_pstate - 412 cpudata->pstate.min_pstate)); 413 } 414 415 416 static int core_get_min_pstate(void) 417 { 418 u64 value; 419 rdmsrl(MSR_PLATFORM_INFO, value); 420 return (value >> 40) & 0xFF; 421 } 422 423 static int core_get_max_pstate(void) 424 { 425 u64 value; 426 rdmsrl(MSR_PLATFORM_INFO, value); 427 return (value >> 8) & 0xFF; 428 } 429 430 static int core_get_turbo_pstate(void) 431 { 432 u64 value; 433 int nont, ret; 434 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 435 nont = core_get_max_pstate(); 436 ret = ((value) & 255); 437 if (ret <= nont) 438 ret = nont; 439 return ret; 440 } 441 442 static void core_set_pstate(struct cpudata *cpudata, int pstate) 443 { 444 u64 val; 445 446 val = pstate << 8; 447 if (limits.no_turbo) 448 val |= (u64)1 << 32; 449 450 wrmsrl(MSR_IA32_PERF_CTL, val); 451 } 452 453 static struct cpu_defaults core_params = { 454 .pid_policy = { 455 .sample_rate_ms = 10, 456 .deadband = 0, 457 .setpoint = 97, 458 .p_gain_pct = 20, 459 .d_gain_pct = 0, 460 .i_gain_pct = 0, 461 }, 462 .funcs = { 463 .get_max = core_get_max_pstate, 464 .get_min = core_get_min_pstate, 465 .get_turbo = core_get_turbo_pstate, 466 .set = core_set_pstate, 467 }, 468 }; 469 470 static struct cpu_defaults byt_params = { 471 .pid_policy = { 472 .sample_rate_ms = 10, 473 .deadband = 0, 474 .setpoint = 97, 475 .p_gain_pct = 14, 476 .d_gain_pct = 0, 477 .i_gain_pct = 4, 478 }, 479 .funcs = { 480 .get_max = byt_get_max_pstate, 481 .get_min = byt_get_min_pstate, 482 .get_turbo = byt_get_turbo_pstate, 483 .set = byt_set_pstate, 484 .get_vid = byt_get_vid, 485 }, 486 }; 487 488 489 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 490 { 491 int max_perf = cpu->pstate.turbo_pstate; 492 int max_perf_adj; 493 int min_perf; 494 if (limits.no_turbo) 495 max_perf = cpu->pstate.max_pstate; 496 497 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 498 *max = clamp_t(int, max_perf_adj, 499 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 500 501 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); 502 *min = clamp_t(int, min_perf, 503 cpu->pstate.min_pstate, max_perf); 504 } 505 506 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 507 { 508 int max_perf, min_perf; 509 510 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 511 512 pstate = clamp_t(int, pstate, min_perf, max_perf); 513 514 if (pstate == cpu->pstate.current_pstate) 515 return; 516 517 trace_cpu_frequency(pstate * 100000, cpu->cpu); 518 519 cpu->pstate.current_pstate = pstate; 520 521 pstate_funcs.set(cpu, pstate); 522 } 523 524 static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) 525 { 526 int target; 527 target = cpu->pstate.current_pstate + steps; 528 529 intel_pstate_set_pstate(cpu, target); 530 } 531 532 static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) 533 { 534 int target; 535 target = cpu->pstate.current_pstate - steps; 536 intel_pstate_set_pstate(cpu, target); 537 } 538 539 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 540 { 541 sprintf(cpu->name, "Intel 2nd generation core"); 542 543 cpu->pstate.min_pstate = pstate_funcs.get_min(); 544 cpu->pstate.max_pstate = pstate_funcs.get_max(); 545 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 546 547 if (pstate_funcs.get_vid) 548 pstate_funcs.get_vid(cpu); 549 550 /* 551 * goto max pstate so we don't slow up boot if we are built-in if we are 552 * a module we will take care of it during normal operation 553 */ 554 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); 555 } 556 557 static inline void intel_pstate_calc_busy(struct cpudata *cpu, 558 struct sample *sample) 559 { 560 int32_t core_pct; 561 int32_t c0_pct; 562 563 core_pct = div_fp(int_tofp((sample->aperf)), 564 int_tofp((sample->mperf))); 565 core_pct = mul_fp(core_pct, int_tofp(100)); 566 FP_ROUNDUP(core_pct); 567 568 c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); 569 570 sample->freq = fp_toint( 571 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); 572 573 sample->core_pct_busy = mul_fp(core_pct, c0_pct); 574 } 575 576 static inline void intel_pstate_sample(struct cpudata *cpu) 577 { 578 u64 aperf, mperf; 579 unsigned long long tsc; 580 581 rdmsrl(MSR_IA32_APERF, aperf); 582 rdmsrl(MSR_IA32_MPERF, mperf); 583 tsc = native_read_tsc(); 584 585 aperf = aperf >> FRAC_BITS; 586 mperf = mperf >> FRAC_BITS; 587 tsc = tsc >> FRAC_BITS; 588 589 cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; 590 cpu->samples[cpu->sample_ptr].aperf = aperf; 591 cpu->samples[cpu->sample_ptr].mperf = mperf; 592 cpu->samples[cpu->sample_ptr].tsc = tsc; 593 cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; 594 cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; 595 cpu->samples[cpu->sample_ptr].tsc -= cpu->prev_tsc; 596 597 intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); 598 599 cpu->prev_aperf = aperf; 600 cpu->prev_mperf = mperf; 601 cpu->prev_tsc = tsc; 602 } 603 604 static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 605 { 606 int sample_time, delay; 607 608 sample_time = pid_params.sample_rate_ms; 609 delay = msecs_to_jiffies(sample_time); 610 mod_timer_pinned(&cpu->timer, jiffies + delay); 611 } 612 613 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 614 { 615 int32_t core_busy, max_pstate, current_pstate; 616 617 core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; 618 max_pstate = int_tofp(cpu->pstate.max_pstate); 619 current_pstate = int_tofp(cpu->pstate.current_pstate); 620 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 621 return FP_ROUNDUP(core_busy); 622 } 623 624 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 625 { 626 int32_t busy_scaled; 627 struct _pid *pid; 628 signed int ctl = 0; 629 int steps; 630 631 pid = &cpu->pid; 632 busy_scaled = intel_pstate_get_scaled_busy(cpu); 633 634 ctl = pid_calc(pid, busy_scaled); 635 636 steps = abs(ctl); 637 638 if (ctl < 0) 639 intel_pstate_pstate_increase(cpu, steps); 640 else 641 intel_pstate_pstate_decrease(cpu, steps); 642 } 643 644 static void intel_pstate_timer_func(unsigned long __data) 645 { 646 struct cpudata *cpu = (struct cpudata *) __data; 647 struct sample *sample; 648 649 intel_pstate_sample(cpu); 650 651 sample = &cpu->samples[cpu->sample_ptr]; 652 653 intel_pstate_adjust_busy_pstate(cpu); 654 655 trace_pstate_sample(fp_toint(sample->core_pct_busy), 656 fp_toint(intel_pstate_get_scaled_busy(cpu)), 657 cpu->pstate.current_pstate, 658 sample->mperf, 659 sample->aperf, 660 sample->freq); 661 662 intel_pstate_set_sample_time(cpu); 663 } 664 665 #define ICPU(model, policy) \ 666 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 667 (unsigned long)&policy } 668 669 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 670 ICPU(0x2a, core_params), 671 ICPU(0x2d, core_params), 672 ICPU(0x37, byt_params), 673 ICPU(0x3a, core_params), 674 ICPU(0x3c, core_params), 675 ICPU(0x3e, core_params), 676 ICPU(0x3f, core_params), 677 ICPU(0x45, core_params), 678 ICPU(0x46, core_params), 679 {} 680 }; 681 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 682 683 static int intel_pstate_init_cpu(unsigned int cpunum) 684 { 685 686 const struct x86_cpu_id *id; 687 struct cpudata *cpu; 688 689 id = x86_match_cpu(intel_pstate_cpu_ids); 690 if (!id) 691 return -ENODEV; 692 693 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); 694 if (!all_cpu_data[cpunum]) 695 return -ENOMEM; 696 697 cpu = all_cpu_data[cpunum]; 698 699 intel_pstate_get_cpu_pstates(cpu); 700 if (!cpu->pstate.current_pstate) { 701 all_cpu_data[cpunum] = NULL; 702 kfree(cpu); 703 return -ENODATA; 704 } 705 706 cpu->cpu = cpunum; 707 708 init_timer_deferrable(&cpu->timer); 709 cpu->timer.function = intel_pstate_timer_func; 710 cpu->timer.data = 711 (unsigned long)cpu; 712 cpu->timer.expires = jiffies + HZ/100; 713 intel_pstate_busy_pid_reset(cpu); 714 intel_pstate_sample(cpu); 715 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); 716 717 add_timer_on(&cpu->timer, cpunum); 718 719 pr_info("Intel pstate controlling: cpu %d\n", cpunum); 720 721 return 0; 722 } 723 724 static unsigned int intel_pstate_get(unsigned int cpu_num) 725 { 726 struct sample *sample; 727 struct cpudata *cpu; 728 729 cpu = all_cpu_data[cpu_num]; 730 if (!cpu) 731 return 0; 732 sample = &cpu->samples[cpu->sample_ptr]; 733 return sample->freq; 734 } 735 736 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 737 { 738 struct cpudata *cpu; 739 740 cpu = all_cpu_data[policy->cpu]; 741 742 if (!policy->cpuinfo.max_freq) 743 return -ENODEV; 744 745 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 746 limits.min_perf_pct = 100; 747 limits.min_perf = int_tofp(1); 748 limits.max_perf_pct = 100; 749 limits.max_perf = int_tofp(1); 750 limits.no_turbo = 0; 751 return 0; 752 } 753 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 754 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); 755 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 756 757 limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; 758 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); 759 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 760 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 761 762 return 0; 763 } 764 765 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 766 { 767 cpufreq_verify_within_cpu_limits(policy); 768 769 if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && 770 (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) 771 return -EINVAL; 772 773 return 0; 774 } 775 776 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) 777 { 778 int cpu = policy->cpu; 779 780 del_timer(&all_cpu_data[cpu]->timer); 781 kfree(all_cpu_data[cpu]); 782 all_cpu_data[cpu] = NULL; 783 return 0; 784 } 785 786 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 787 { 788 struct cpudata *cpu; 789 int rc; 790 791 rc = intel_pstate_init_cpu(policy->cpu); 792 if (rc) 793 return rc; 794 795 cpu = all_cpu_data[policy->cpu]; 796 797 if (!limits.no_turbo && 798 limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 799 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 800 else 801 policy->policy = CPUFREQ_POLICY_POWERSAVE; 802 803 policy->min = cpu->pstate.min_pstate * 100000; 804 policy->max = cpu->pstate.turbo_pstate * 100000; 805 806 /* cpuinfo and default policy values */ 807 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; 808 policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; 809 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 810 cpumask_set_cpu(policy->cpu, policy->cpus); 811 812 return 0; 813 } 814 815 static struct cpufreq_driver intel_pstate_driver = { 816 .flags = CPUFREQ_CONST_LOOPS, 817 .verify = intel_pstate_verify_policy, 818 .setpolicy = intel_pstate_set_policy, 819 .get = intel_pstate_get, 820 .init = intel_pstate_cpu_init, 821 .exit = intel_pstate_cpu_exit, 822 .name = "intel_pstate", 823 }; 824 825 static int __initdata no_load; 826 827 static int intel_pstate_msrs_not_valid(void) 828 { 829 /* Check that all the msr's we are using are valid. */ 830 u64 aperf, mperf, tmp; 831 832 rdmsrl(MSR_IA32_APERF, aperf); 833 rdmsrl(MSR_IA32_MPERF, mperf); 834 835 if (!pstate_funcs.get_max() || 836 !pstate_funcs.get_min() || 837 !pstate_funcs.get_turbo()) 838 return -ENODEV; 839 840 rdmsrl(MSR_IA32_APERF, tmp); 841 if (!(tmp - aperf)) 842 return -ENODEV; 843 844 rdmsrl(MSR_IA32_MPERF, tmp); 845 if (!(tmp - mperf)) 846 return -ENODEV; 847 848 return 0; 849 } 850 851 static void copy_pid_params(struct pstate_adjust_policy *policy) 852 { 853 pid_params.sample_rate_ms = policy->sample_rate_ms; 854 pid_params.p_gain_pct = policy->p_gain_pct; 855 pid_params.i_gain_pct = policy->i_gain_pct; 856 pid_params.d_gain_pct = policy->d_gain_pct; 857 pid_params.deadband = policy->deadband; 858 pid_params.setpoint = policy->setpoint; 859 } 860 861 static void copy_cpu_funcs(struct pstate_funcs *funcs) 862 { 863 pstate_funcs.get_max = funcs->get_max; 864 pstate_funcs.get_min = funcs->get_min; 865 pstate_funcs.get_turbo = funcs->get_turbo; 866 pstate_funcs.set = funcs->set; 867 pstate_funcs.get_vid = funcs->get_vid; 868 } 869 870 #if IS_ENABLED(CONFIG_ACPI) 871 #include <acpi/processor.h> 872 873 static bool intel_pstate_no_acpi_pss(void) 874 { 875 int i; 876 877 for_each_possible_cpu(i) { 878 acpi_status status; 879 union acpi_object *pss; 880 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 881 struct acpi_processor *pr = per_cpu(processors, i); 882 883 if (!pr) 884 continue; 885 886 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 887 if (ACPI_FAILURE(status)) 888 continue; 889 890 pss = buffer.pointer; 891 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 892 kfree(pss); 893 return false; 894 } 895 896 kfree(pss); 897 } 898 899 return true; 900 } 901 902 struct hw_vendor_info { 903 u16 valid; 904 char oem_id[ACPI_OEM_ID_SIZE]; 905 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 906 }; 907 908 /* Hardware vendor-specific info that has its own power management modes */ 909 static struct hw_vendor_info vendor_info[] = { 910 {1, "HP ", "ProLiant"}, 911 {0, "", ""}, 912 }; 913 914 static bool intel_pstate_platform_pwr_mgmt_exists(void) 915 { 916 struct acpi_table_header hdr; 917 struct hw_vendor_info *v_info; 918 919 if (acpi_disabled 920 || ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 921 return false; 922 923 for (v_info = vendor_info; v_info->valid; v_info++) { 924 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) 925 && !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) 926 && intel_pstate_no_acpi_pss()) 927 return true; 928 } 929 930 return false; 931 } 932 #else /* CONFIG_ACPI not enabled */ 933 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 934 #endif /* CONFIG_ACPI */ 935 936 static int __init intel_pstate_init(void) 937 { 938 int cpu, rc = 0; 939 const struct x86_cpu_id *id; 940 struct cpu_defaults *cpu_info; 941 942 if (no_load) 943 return -ENODEV; 944 945 id = x86_match_cpu(intel_pstate_cpu_ids); 946 if (!id) 947 return -ENODEV; 948 949 /* 950 * The Intel pstate driver will be ignored if the platform 951 * firmware has its own power management modes. 952 */ 953 if (intel_pstate_platform_pwr_mgmt_exists()) 954 return -ENODEV; 955 956 cpu_info = (struct cpu_defaults *)id->driver_data; 957 958 copy_pid_params(&cpu_info->pid_policy); 959 copy_cpu_funcs(&cpu_info->funcs); 960 961 if (intel_pstate_msrs_not_valid()) 962 return -ENODEV; 963 964 pr_info("Intel P-state driver initializing.\n"); 965 966 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 967 if (!all_cpu_data) 968 return -ENOMEM; 969 970 rc = cpufreq_register_driver(&intel_pstate_driver); 971 if (rc) 972 goto out; 973 974 intel_pstate_debug_expose_params(); 975 intel_pstate_sysfs_expose_params(); 976 977 return rc; 978 out: 979 get_online_cpus(); 980 for_each_online_cpu(cpu) { 981 if (all_cpu_data[cpu]) { 982 del_timer_sync(&all_cpu_data[cpu]->timer); 983 kfree(all_cpu_data[cpu]); 984 } 985 } 986 987 put_online_cpus(); 988 vfree(all_cpu_data); 989 return -ENODEV; 990 } 991 device_initcall(intel_pstate_init); 992 993 static int __init intel_pstate_setup(char *str) 994 { 995 if (!str) 996 return -EINVAL; 997 998 if (!strcmp(str, "disable")) 999 no_load = 1; 1000 return 0; 1001 } 1002 early_param("intel_pstate", intel_pstate_setup); 1003 1004 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 1005 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 1006 MODULE_LICENSE("GPL"); 1007