1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/module.h> 16 #include <linux/ktime.h> 17 #include <linux/hrtimer.h> 18 #include <linux/tick.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/list.h> 22 #include <linux/cpu.h> 23 #include <linux/cpufreq.h> 24 #include <linux/sysfs.h> 25 #include <linux/types.h> 26 #include <linux/fs.h> 27 #include <linux/debugfs.h> 28 #include <linux/acpi.h> 29 #include <trace/events/power.h> 30 31 #include <asm/div64.h> 32 #include <asm/msr.h> 33 #include <asm/cpu_device_id.h> 34 35 #define SAMPLE_COUNT 3 36 37 #define BYT_RATIOS 0x66a 38 #define BYT_VIDS 0x66b 39 #define BYT_TURBO_RATIOS 0x66c 40 41 42 #define FRAC_BITS 6 43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 44 #define fp_toint(X) ((X) >> FRAC_BITS) 45 #define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) 46 47 static inline int32_t mul_fp(int32_t x, int32_t y) 48 { 49 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 50 } 51 52 static inline int32_t div_fp(int32_t x, int32_t y) 53 { 54 return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); 55 } 56 57 struct sample { 58 int32_t core_pct_busy; 59 u64 aperf; 60 u64 mperf; 61 unsigned long long tsc; 62 int freq; 63 }; 64 65 struct pstate_data { 66 int current_pstate; 67 int min_pstate; 68 int max_pstate; 69 int turbo_pstate; 70 }; 71 72 struct vid_data { 73 int32_t min; 74 int32_t max; 75 int32_t ratio; 76 }; 77 78 struct _pid { 79 int setpoint; 80 int32_t integral; 81 int32_t p_gain; 82 int32_t i_gain; 83 int32_t d_gain; 84 int deadband; 85 int32_t last_err; 86 }; 87 88 struct cpudata { 89 int cpu; 90 91 char name[64]; 92 93 struct timer_list timer; 94 95 struct pstate_data pstate; 96 struct vid_data vid; 97 struct _pid pid; 98 99 u64 prev_aperf; 100 u64 prev_mperf; 101 unsigned long long prev_tsc; 102 struct sample sample; 103 }; 104 105 static struct cpudata **all_cpu_data; 106 struct pstate_adjust_policy { 107 int sample_rate_ms; 108 int deadband; 109 int setpoint; 110 int p_gain_pct; 111 int d_gain_pct; 112 int i_gain_pct; 113 }; 114 115 struct pstate_funcs { 116 int (*get_max)(void); 117 int (*get_min)(void); 118 int (*get_turbo)(void); 119 void (*set)(struct cpudata*, int pstate); 120 void (*get_vid)(struct cpudata *); 121 }; 122 123 struct cpu_defaults { 124 struct pstate_adjust_policy pid_policy; 125 struct pstate_funcs funcs; 126 }; 127 128 static struct pstate_adjust_policy pid_params; 129 static struct pstate_funcs pstate_funcs; 130 131 struct perf_limits { 132 int no_turbo; 133 int max_perf_pct; 134 int min_perf_pct; 135 int32_t max_perf; 136 int32_t min_perf; 137 int max_policy_pct; 138 int max_sysfs_pct; 139 }; 140 141 static struct perf_limits limits = { 142 .no_turbo = 0, 143 .max_perf_pct = 100, 144 .max_perf = int_tofp(1), 145 .min_perf_pct = 0, 146 .min_perf = 0, 147 .max_policy_pct = 100, 148 .max_sysfs_pct = 100, 149 }; 150 151 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 152 int deadband, int integral) { 153 pid->setpoint = setpoint; 154 pid->deadband = deadband; 155 pid->integral = int_tofp(integral); 156 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 157 } 158 159 static inline void pid_p_gain_set(struct _pid *pid, int percent) 160 { 161 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); 162 } 163 164 static inline void pid_i_gain_set(struct _pid *pid, int percent) 165 { 166 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); 167 } 168 169 static inline void pid_d_gain_set(struct _pid *pid, int percent) 170 { 171 172 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); 173 } 174 175 static signed int pid_calc(struct _pid *pid, int32_t busy) 176 { 177 signed int result; 178 int32_t pterm, dterm, fp_error; 179 int32_t integral_limit; 180 181 fp_error = int_tofp(pid->setpoint) - busy; 182 183 if (abs(fp_error) <= int_tofp(pid->deadband)) 184 return 0; 185 186 pterm = mul_fp(pid->p_gain, fp_error); 187 188 pid->integral += fp_error; 189 190 /* limit the integral term */ 191 integral_limit = int_tofp(30); 192 if (pid->integral > integral_limit) 193 pid->integral = integral_limit; 194 if (pid->integral < -integral_limit) 195 pid->integral = -integral_limit; 196 197 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 198 pid->last_err = fp_error; 199 200 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 201 202 return (signed int)fp_toint(result); 203 } 204 205 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 206 { 207 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 208 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 209 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 210 211 pid_reset(&cpu->pid, 212 pid_params.setpoint, 213 100, 214 pid_params.deadband, 215 0); 216 } 217 218 static inline void intel_pstate_reset_all_pid(void) 219 { 220 unsigned int cpu; 221 for_each_online_cpu(cpu) { 222 if (all_cpu_data[cpu]) 223 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 224 } 225 } 226 227 /************************** debugfs begin ************************/ 228 static int pid_param_set(void *data, u64 val) 229 { 230 *(u32 *)data = val; 231 intel_pstate_reset_all_pid(); 232 return 0; 233 } 234 static int pid_param_get(void *data, u64 *val) 235 { 236 *val = *(u32 *)data; 237 return 0; 238 } 239 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, 240 pid_param_set, "%llu\n"); 241 242 struct pid_param { 243 char *name; 244 void *value; 245 }; 246 247 static struct pid_param pid_files[] = { 248 {"sample_rate_ms", &pid_params.sample_rate_ms}, 249 {"d_gain_pct", &pid_params.d_gain_pct}, 250 {"i_gain_pct", &pid_params.i_gain_pct}, 251 {"deadband", &pid_params.deadband}, 252 {"setpoint", &pid_params.setpoint}, 253 {"p_gain_pct", &pid_params.p_gain_pct}, 254 {NULL, NULL} 255 }; 256 257 static struct dentry *debugfs_parent; 258 static void intel_pstate_debug_expose_params(void) 259 { 260 int i = 0; 261 262 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 263 if (IS_ERR_OR_NULL(debugfs_parent)) 264 return; 265 while (pid_files[i].name) { 266 debugfs_create_file(pid_files[i].name, 0660, 267 debugfs_parent, pid_files[i].value, 268 &fops_pid_param); 269 i++; 270 } 271 } 272 273 /************************** debugfs end ************************/ 274 275 /************************** sysfs begin ************************/ 276 #define show_one(file_name, object) \ 277 static ssize_t show_##file_name \ 278 (struct kobject *kobj, struct attribute *attr, char *buf) \ 279 { \ 280 return sprintf(buf, "%u\n", limits.object); \ 281 } 282 283 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 284 const char *buf, size_t count) 285 { 286 unsigned int input; 287 int ret; 288 ret = sscanf(buf, "%u", &input); 289 if (ret != 1) 290 return -EINVAL; 291 limits.no_turbo = clamp_t(int, input, 0 , 1); 292 293 return count; 294 } 295 296 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 297 const char *buf, size_t count) 298 { 299 unsigned int input; 300 int ret; 301 ret = sscanf(buf, "%u", &input); 302 if (ret != 1) 303 return -EINVAL; 304 305 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); 306 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 307 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 308 return count; 309 } 310 311 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 312 const char *buf, size_t count) 313 { 314 unsigned int input; 315 int ret; 316 ret = sscanf(buf, "%u", &input); 317 if (ret != 1) 318 return -EINVAL; 319 limits.min_perf_pct = clamp_t(int, input, 0 , 100); 320 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 321 322 return count; 323 } 324 325 show_one(no_turbo, no_turbo); 326 show_one(max_perf_pct, max_perf_pct); 327 show_one(min_perf_pct, min_perf_pct); 328 329 define_one_global_rw(no_turbo); 330 define_one_global_rw(max_perf_pct); 331 define_one_global_rw(min_perf_pct); 332 333 static struct attribute *intel_pstate_attributes[] = { 334 &no_turbo.attr, 335 &max_perf_pct.attr, 336 &min_perf_pct.attr, 337 NULL 338 }; 339 340 static struct attribute_group intel_pstate_attr_group = { 341 .attrs = intel_pstate_attributes, 342 }; 343 static struct kobject *intel_pstate_kobject; 344 345 static void intel_pstate_sysfs_expose_params(void) 346 { 347 int rc; 348 349 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 350 &cpu_subsys.dev_root->kobj); 351 BUG_ON(!intel_pstate_kobject); 352 rc = sysfs_create_group(intel_pstate_kobject, 353 &intel_pstate_attr_group); 354 BUG_ON(rc); 355 } 356 357 /************************** sysfs end ************************/ 358 static int byt_get_min_pstate(void) 359 { 360 u64 value; 361 rdmsrl(BYT_RATIOS, value); 362 return (value >> 8) & 0xFF; 363 } 364 365 static int byt_get_max_pstate(void) 366 { 367 u64 value; 368 rdmsrl(BYT_RATIOS, value); 369 return (value >> 16) & 0xFF; 370 } 371 372 static int byt_get_turbo_pstate(void) 373 { 374 u64 value; 375 rdmsrl(BYT_TURBO_RATIOS, value); 376 return value & 0x3F; 377 } 378 379 static void byt_set_pstate(struct cpudata *cpudata, int pstate) 380 { 381 u64 val; 382 int32_t vid_fp; 383 u32 vid; 384 385 val = pstate << 8; 386 if (limits.no_turbo) 387 val |= (u64)1 << 32; 388 389 vid_fp = cpudata->vid.min + mul_fp( 390 int_tofp(pstate - cpudata->pstate.min_pstate), 391 cpudata->vid.ratio); 392 393 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 394 vid = fp_toint(vid_fp); 395 396 val |= vid; 397 398 wrmsrl(MSR_IA32_PERF_CTL, val); 399 } 400 401 static void byt_get_vid(struct cpudata *cpudata) 402 { 403 u64 value; 404 405 rdmsrl(BYT_VIDS, value); 406 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 407 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 408 cpudata->vid.ratio = div_fp( 409 cpudata->vid.max - cpudata->vid.min, 410 int_tofp(cpudata->pstate.max_pstate - 411 cpudata->pstate.min_pstate)); 412 } 413 414 415 static int core_get_min_pstate(void) 416 { 417 u64 value; 418 rdmsrl(MSR_PLATFORM_INFO, value); 419 return (value >> 40) & 0xFF; 420 } 421 422 static int core_get_max_pstate(void) 423 { 424 u64 value; 425 rdmsrl(MSR_PLATFORM_INFO, value); 426 return (value >> 8) & 0xFF; 427 } 428 429 static int core_get_turbo_pstate(void) 430 { 431 u64 value; 432 int nont, ret; 433 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 434 nont = core_get_max_pstate(); 435 ret = ((value) & 255); 436 if (ret <= nont) 437 ret = nont; 438 return ret; 439 } 440 441 static void core_set_pstate(struct cpudata *cpudata, int pstate) 442 { 443 u64 val; 444 445 val = pstate << 8; 446 if (limits.no_turbo) 447 val |= (u64)1 << 32; 448 449 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 450 } 451 452 static struct cpu_defaults core_params = { 453 .pid_policy = { 454 .sample_rate_ms = 10, 455 .deadband = 0, 456 .setpoint = 97, 457 .p_gain_pct = 20, 458 .d_gain_pct = 0, 459 .i_gain_pct = 0, 460 }, 461 .funcs = { 462 .get_max = core_get_max_pstate, 463 .get_min = core_get_min_pstate, 464 .get_turbo = core_get_turbo_pstate, 465 .set = core_set_pstate, 466 }, 467 }; 468 469 static struct cpu_defaults byt_params = { 470 .pid_policy = { 471 .sample_rate_ms = 10, 472 .deadband = 0, 473 .setpoint = 97, 474 .p_gain_pct = 14, 475 .d_gain_pct = 0, 476 .i_gain_pct = 4, 477 }, 478 .funcs = { 479 .get_max = byt_get_max_pstate, 480 .get_min = byt_get_min_pstate, 481 .get_turbo = byt_get_turbo_pstate, 482 .set = byt_set_pstate, 483 .get_vid = byt_get_vid, 484 }, 485 }; 486 487 488 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 489 { 490 int max_perf = cpu->pstate.turbo_pstate; 491 int max_perf_adj; 492 int min_perf; 493 if (limits.no_turbo) 494 max_perf = cpu->pstate.max_pstate; 495 496 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 497 *max = clamp_t(int, max_perf_adj, 498 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 499 500 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); 501 *min = clamp_t(int, min_perf, 502 cpu->pstate.min_pstate, max_perf); 503 } 504 505 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 506 { 507 int max_perf, min_perf; 508 509 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 510 511 pstate = clamp_t(int, pstate, min_perf, max_perf); 512 513 if (pstate == cpu->pstate.current_pstate) 514 return; 515 516 trace_cpu_frequency(pstate * 100000, cpu->cpu); 517 518 cpu->pstate.current_pstate = pstate; 519 520 pstate_funcs.set(cpu, pstate); 521 } 522 523 static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) 524 { 525 int target; 526 target = cpu->pstate.current_pstate + steps; 527 528 intel_pstate_set_pstate(cpu, target); 529 } 530 531 static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) 532 { 533 int target; 534 target = cpu->pstate.current_pstate - steps; 535 intel_pstate_set_pstate(cpu, target); 536 } 537 538 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 539 { 540 sprintf(cpu->name, "Intel 2nd generation core"); 541 542 cpu->pstate.min_pstate = pstate_funcs.get_min(); 543 cpu->pstate.max_pstate = pstate_funcs.get_max(); 544 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 545 546 if (pstate_funcs.get_vid) 547 pstate_funcs.get_vid(cpu); 548 549 /* 550 * goto max pstate so we don't slow up boot if we are built-in if we are 551 * a module we will take care of it during normal operation 552 */ 553 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); 554 } 555 556 static inline void intel_pstate_calc_busy(struct cpudata *cpu, 557 struct sample *sample) 558 { 559 int32_t core_pct; 560 int32_t c0_pct; 561 562 core_pct = div_fp(int_tofp((sample->aperf)), 563 int_tofp((sample->mperf))); 564 core_pct = mul_fp(core_pct, int_tofp(100)); 565 FP_ROUNDUP(core_pct); 566 567 c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); 568 569 sample->freq = fp_toint( 570 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); 571 572 sample->core_pct_busy = mul_fp(core_pct, c0_pct); 573 } 574 575 static inline void intel_pstate_sample(struct cpudata *cpu) 576 { 577 u64 aperf, mperf; 578 unsigned long long tsc; 579 580 rdmsrl(MSR_IA32_APERF, aperf); 581 rdmsrl(MSR_IA32_MPERF, mperf); 582 tsc = native_read_tsc(); 583 584 aperf = aperf >> FRAC_BITS; 585 mperf = mperf >> FRAC_BITS; 586 tsc = tsc >> FRAC_BITS; 587 588 cpu->sample.aperf = aperf; 589 cpu->sample.mperf = mperf; 590 cpu->sample.tsc = tsc; 591 cpu->sample.aperf -= cpu->prev_aperf; 592 cpu->sample.mperf -= cpu->prev_mperf; 593 cpu->sample.tsc -= cpu->prev_tsc; 594 595 intel_pstate_calc_busy(cpu, &cpu->sample); 596 597 cpu->prev_aperf = aperf; 598 cpu->prev_mperf = mperf; 599 cpu->prev_tsc = tsc; 600 } 601 602 static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 603 { 604 int sample_time, delay; 605 606 sample_time = pid_params.sample_rate_ms; 607 delay = msecs_to_jiffies(sample_time); 608 mod_timer_pinned(&cpu->timer, jiffies + delay); 609 } 610 611 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 612 { 613 int32_t core_busy, max_pstate, current_pstate; 614 615 core_busy = cpu->sample.core_pct_busy; 616 max_pstate = int_tofp(cpu->pstate.max_pstate); 617 current_pstate = int_tofp(cpu->pstate.current_pstate); 618 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 619 return FP_ROUNDUP(core_busy); 620 } 621 622 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 623 { 624 int32_t busy_scaled; 625 struct _pid *pid; 626 signed int ctl = 0; 627 int steps; 628 629 pid = &cpu->pid; 630 busy_scaled = intel_pstate_get_scaled_busy(cpu); 631 632 ctl = pid_calc(pid, busy_scaled); 633 634 steps = abs(ctl); 635 636 if (ctl < 0) 637 intel_pstate_pstate_increase(cpu, steps); 638 else 639 intel_pstate_pstate_decrease(cpu, steps); 640 } 641 642 static void intel_pstate_timer_func(unsigned long __data) 643 { 644 struct cpudata *cpu = (struct cpudata *) __data; 645 struct sample *sample; 646 647 intel_pstate_sample(cpu); 648 649 sample = &cpu->sample; 650 651 intel_pstate_adjust_busy_pstate(cpu); 652 653 trace_pstate_sample(fp_toint(sample->core_pct_busy), 654 fp_toint(intel_pstate_get_scaled_busy(cpu)), 655 cpu->pstate.current_pstate, 656 sample->mperf, 657 sample->aperf, 658 sample->freq); 659 660 intel_pstate_set_sample_time(cpu); 661 } 662 663 #define ICPU(model, policy) \ 664 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 665 (unsigned long)&policy } 666 667 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 668 ICPU(0x2a, core_params), 669 ICPU(0x2d, core_params), 670 ICPU(0x37, byt_params), 671 ICPU(0x3a, core_params), 672 ICPU(0x3c, core_params), 673 ICPU(0x3e, core_params), 674 ICPU(0x3f, core_params), 675 ICPU(0x45, core_params), 676 ICPU(0x46, core_params), 677 {} 678 }; 679 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 680 681 static int intel_pstate_init_cpu(unsigned int cpunum) 682 { 683 684 const struct x86_cpu_id *id; 685 struct cpudata *cpu; 686 687 id = x86_match_cpu(intel_pstate_cpu_ids); 688 if (!id) 689 return -ENODEV; 690 691 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); 692 if (!all_cpu_data[cpunum]) 693 return -ENOMEM; 694 695 cpu = all_cpu_data[cpunum]; 696 697 intel_pstate_get_cpu_pstates(cpu); 698 if (!cpu->pstate.current_pstate) { 699 all_cpu_data[cpunum] = NULL; 700 kfree(cpu); 701 return -ENODATA; 702 } 703 704 cpu->cpu = cpunum; 705 706 init_timer_deferrable(&cpu->timer); 707 cpu->timer.function = intel_pstate_timer_func; 708 cpu->timer.data = 709 (unsigned long)cpu; 710 cpu->timer.expires = jiffies + HZ/100; 711 intel_pstate_busy_pid_reset(cpu); 712 intel_pstate_sample(cpu); 713 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); 714 715 add_timer_on(&cpu->timer, cpunum); 716 717 pr_info("Intel pstate controlling: cpu %d\n", cpunum); 718 719 return 0; 720 } 721 722 static unsigned int intel_pstate_get(unsigned int cpu_num) 723 { 724 struct sample *sample; 725 struct cpudata *cpu; 726 727 cpu = all_cpu_data[cpu_num]; 728 if (!cpu) 729 return 0; 730 sample = &cpu->sample; 731 return sample->freq; 732 } 733 734 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 735 { 736 struct cpudata *cpu; 737 738 cpu = all_cpu_data[policy->cpu]; 739 740 if (!policy->cpuinfo.max_freq) 741 return -ENODEV; 742 743 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 744 limits.min_perf_pct = 100; 745 limits.min_perf = int_tofp(1); 746 limits.max_perf_pct = 100; 747 limits.max_perf = int_tofp(1); 748 limits.no_turbo = 0; 749 return 0; 750 } 751 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 752 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); 753 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 754 755 limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; 756 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); 757 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 758 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 759 760 return 0; 761 } 762 763 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 764 { 765 cpufreq_verify_within_cpu_limits(policy); 766 767 if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && 768 (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) 769 return -EINVAL; 770 771 return 0; 772 } 773 774 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 775 { 776 int cpu_num = policy->cpu; 777 struct cpudata *cpu = all_cpu_data[cpu_num]; 778 779 pr_info("intel_pstate CPU %d exiting\n", cpu_num); 780 781 del_timer_sync(&all_cpu_data[cpu_num]->timer); 782 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 783 kfree(all_cpu_data[cpu_num]); 784 all_cpu_data[cpu_num] = NULL; 785 } 786 787 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 788 { 789 struct cpudata *cpu; 790 int rc; 791 792 rc = intel_pstate_init_cpu(policy->cpu); 793 if (rc) 794 return rc; 795 796 cpu = all_cpu_data[policy->cpu]; 797 798 if (!limits.no_turbo && 799 limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 800 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 801 else 802 policy->policy = CPUFREQ_POLICY_POWERSAVE; 803 804 policy->min = cpu->pstate.min_pstate * 100000; 805 policy->max = cpu->pstate.turbo_pstate * 100000; 806 807 /* cpuinfo and default policy values */ 808 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; 809 policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; 810 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 811 cpumask_set_cpu(policy->cpu, policy->cpus); 812 813 return 0; 814 } 815 816 static struct cpufreq_driver intel_pstate_driver = { 817 .flags = CPUFREQ_CONST_LOOPS, 818 .verify = intel_pstate_verify_policy, 819 .setpolicy = intel_pstate_set_policy, 820 .get = intel_pstate_get, 821 .init = intel_pstate_cpu_init, 822 .stop_cpu = intel_pstate_stop_cpu, 823 .name = "intel_pstate", 824 }; 825 826 static int __initdata no_load; 827 828 static int intel_pstate_msrs_not_valid(void) 829 { 830 /* Check that all the msr's we are using are valid. */ 831 u64 aperf, mperf, tmp; 832 833 rdmsrl(MSR_IA32_APERF, aperf); 834 rdmsrl(MSR_IA32_MPERF, mperf); 835 836 if (!pstate_funcs.get_max() || 837 !pstate_funcs.get_min() || 838 !pstate_funcs.get_turbo()) 839 return -ENODEV; 840 841 rdmsrl(MSR_IA32_APERF, tmp); 842 if (!(tmp - aperf)) 843 return -ENODEV; 844 845 rdmsrl(MSR_IA32_MPERF, tmp); 846 if (!(tmp - mperf)) 847 return -ENODEV; 848 849 return 0; 850 } 851 852 static void copy_pid_params(struct pstate_adjust_policy *policy) 853 { 854 pid_params.sample_rate_ms = policy->sample_rate_ms; 855 pid_params.p_gain_pct = policy->p_gain_pct; 856 pid_params.i_gain_pct = policy->i_gain_pct; 857 pid_params.d_gain_pct = policy->d_gain_pct; 858 pid_params.deadband = policy->deadband; 859 pid_params.setpoint = policy->setpoint; 860 } 861 862 static void copy_cpu_funcs(struct pstate_funcs *funcs) 863 { 864 pstate_funcs.get_max = funcs->get_max; 865 pstate_funcs.get_min = funcs->get_min; 866 pstate_funcs.get_turbo = funcs->get_turbo; 867 pstate_funcs.set = funcs->set; 868 pstate_funcs.get_vid = funcs->get_vid; 869 } 870 871 #if IS_ENABLED(CONFIG_ACPI) 872 #include <acpi/processor.h> 873 874 static bool intel_pstate_no_acpi_pss(void) 875 { 876 int i; 877 878 for_each_possible_cpu(i) { 879 acpi_status status; 880 union acpi_object *pss; 881 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 882 struct acpi_processor *pr = per_cpu(processors, i); 883 884 if (!pr) 885 continue; 886 887 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 888 if (ACPI_FAILURE(status)) 889 continue; 890 891 pss = buffer.pointer; 892 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 893 kfree(pss); 894 return false; 895 } 896 897 kfree(pss); 898 } 899 900 return true; 901 } 902 903 struct hw_vendor_info { 904 u16 valid; 905 char oem_id[ACPI_OEM_ID_SIZE]; 906 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 907 }; 908 909 /* Hardware vendor-specific info that has its own power management modes */ 910 static struct hw_vendor_info vendor_info[] = { 911 {1, "HP ", "ProLiant"}, 912 {0, "", ""}, 913 }; 914 915 static bool intel_pstate_platform_pwr_mgmt_exists(void) 916 { 917 struct acpi_table_header hdr; 918 struct hw_vendor_info *v_info; 919 920 if (acpi_disabled 921 || ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 922 return false; 923 924 for (v_info = vendor_info; v_info->valid; v_info++) { 925 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) 926 && !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) 927 && intel_pstate_no_acpi_pss()) 928 return true; 929 } 930 931 return false; 932 } 933 #else /* CONFIG_ACPI not enabled */ 934 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 935 #endif /* CONFIG_ACPI */ 936 937 static int __init intel_pstate_init(void) 938 { 939 int cpu, rc = 0; 940 const struct x86_cpu_id *id; 941 struct cpu_defaults *cpu_info; 942 943 if (no_load) 944 return -ENODEV; 945 946 id = x86_match_cpu(intel_pstate_cpu_ids); 947 if (!id) 948 return -ENODEV; 949 950 /* 951 * The Intel pstate driver will be ignored if the platform 952 * firmware has its own power management modes. 953 */ 954 if (intel_pstate_platform_pwr_mgmt_exists()) 955 return -ENODEV; 956 957 cpu_info = (struct cpu_defaults *)id->driver_data; 958 959 copy_pid_params(&cpu_info->pid_policy); 960 copy_cpu_funcs(&cpu_info->funcs); 961 962 if (intel_pstate_msrs_not_valid()) 963 return -ENODEV; 964 965 pr_info("Intel P-state driver initializing.\n"); 966 967 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 968 if (!all_cpu_data) 969 return -ENOMEM; 970 971 rc = cpufreq_register_driver(&intel_pstate_driver); 972 if (rc) 973 goto out; 974 975 intel_pstate_debug_expose_params(); 976 intel_pstate_sysfs_expose_params(); 977 978 return rc; 979 out: 980 get_online_cpus(); 981 for_each_online_cpu(cpu) { 982 if (all_cpu_data[cpu]) { 983 del_timer_sync(&all_cpu_data[cpu]->timer); 984 kfree(all_cpu_data[cpu]); 985 } 986 } 987 988 put_online_cpus(); 989 vfree(all_cpu_data); 990 return -ENODEV; 991 } 992 device_initcall(intel_pstate_init); 993 994 static int __init intel_pstate_setup(char *str) 995 { 996 if (!str) 997 return -EINVAL; 998 999 if (!strcmp(str, "disable")) 1000 no_load = 1; 1001 return 0; 1002 } 1003 early_param("intel_pstate", intel_pstate_setup); 1004 1005 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 1006 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 1007 MODULE_LICENSE("GPL"); 1008