1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_powerclamp.c - package c-state idle injection 4 * 5 * Copyright (c) 2012-2023, Intel Corporation. 6 * 7 * Authors: 8 * Arjan van de Ven <arjan@linux.intel.com> 9 * Jacob Pan <jacob.jun.pan@linux.intel.com> 10 * 11 * TODO: 12 * 1. better handle wakeup from external interrupts, currently a fixed 13 * compensation is added to clamping duration when excessive amount 14 * of wakeups are observed during idle time. the reason is that in 15 * case of external interrupts without need for ack, clamping down 16 * cpu in non-irq context does not reduce irq. for majority of the 17 * cases, clamping down cpu does help reduce irq as well, we should 18 * be able to differentiate the two cases and give a quantitative 19 * solution for the irqs that we can control. perhaps based on 20 * get_cpu_iowait_time_us() 21 * 22 * 2. synchronization with other hw blocks 23 */ 24 25 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 26 27 #include <linux/module.h> 28 #include <linux/kernel.h> 29 #include <linux/delay.h> 30 #include <linux/cpu.h> 31 #include <linux/thermal.h> 32 #include <linux/debugfs.h> 33 #include <linux/seq_file.h> 34 #include <linux/idle_inject.h> 35 36 #include <asm/msr.h> 37 #include <asm/mwait.h> 38 #include <asm/cpu_device_id.h> 39 40 #define MAX_TARGET_RATIO (100U) 41 /* For each undisturbed clamping period (no extra wake ups during idle time), 42 * we increment the confidence counter for the given target ratio. 43 * CONFIDENCE_OK defines the level where runtime calibration results are 44 * valid. 45 */ 46 #define CONFIDENCE_OK (3) 47 /* Default idle injection duration, driver adjust sleep time to meet target 48 * idle ratio. Similar to frequency modulation. 49 */ 50 #define DEFAULT_DURATION_JIFFIES (6) 51 52 static unsigned int target_mwait; 53 static struct dentry *debug_dir; 54 static bool poll_pkg_cstate_enable; 55 56 /* Idle ratio observed using package C-state counters */ 57 static unsigned int current_ratio; 58 59 /* Skip the idle injection till set to true */ 60 static bool should_skip; 61 62 struct powerclamp_data { 63 unsigned int cpu; 64 unsigned int count; 65 unsigned int guard; 66 unsigned int window_size_now; 67 unsigned int target_ratio; 68 bool clamping; 69 }; 70 71 static struct powerclamp_data powerclamp_data; 72 73 static struct thermal_cooling_device *cooling_dev; 74 75 static DEFINE_MUTEX(powerclamp_lock); 76 77 /* This duration is in microseconds */ 78 static unsigned int duration; 79 static unsigned int pkg_cstate_ratio_cur; 80 static unsigned int window_size; 81 82 static int duration_set(const char *arg, const struct kernel_param *kp) 83 { 84 int ret = 0; 85 unsigned long new_duration; 86 87 ret = kstrtoul(arg, 10, &new_duration); 88 if (ret) 89 goto exit; 90 if (new_duration > 25 || new_duration < 6) { 91 pr_err("Out of recommended range %lu, between 6-25ms\n", 92 new_duration); 93 ret = -EINVAL; 94 goto exit; 95 } 96 97 mutex_lock(&powerclamp_lock); 98 duration = clamp(new_duration, 6ul, 25ul) * 1000; 99 mutex_unlock(&powerclamp_lock); 100 exit: 101 102 return ret; 103 } 104 105 static int duration_get(char *buf, const struct kernel_param *kp) 106 { 107 int ret; 108 109 mutex_lock(&powerclamp_lock); 110 ret = sysfs_emit(buf, "%d\n", duration / 1000); 111 mutex_unlock(&powerclamp_lock); 112 113 return ret; 114 } 115 116 static const struct kernel_param_ops duration_ops = { 117 .set = duration_set, 118 .get = duration_get, 119 }; 120 121 module_param_cb(duration, &duration_ops, NULL, 0644); 122 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); 123 124 #define DEFAULT_MAX_IDLE 50 125 #define MAX_ALL_CPU_IDLE 75 126 127 static u8 max_idle = DEFAULT_MAX_IDLE; 128 129 static cpumask_var_t idle_injection_cpu_mask; 130 131 static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask) 132 { 133 if (cpumask_available(idle_injection_cpu_mask)) 134 goto copy_mask; 135 136 /* This mask is allocated only one time and freed during module exit */ 137 if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL)) 138 return -ENOMEM; 139 140 copy_mask: 141 cpumask_copy(idle_injection_cpu_mask, copy_mask); 142 143 return 0; 144 } 145 146 /* Return true if the cpumask and idle percent combination is invalid */ 147 static bool check_invalid(cpumask_var_t mask, u8 idle) 148 { 149 if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE) 150 return true; 151 152 return false; 153 } 154 155 static int cpumask_set(const char *arg, const struct kernel_param *kp) 156 { 157 cpumask_var_t new_mask; 158 int ret; 159 160 mutex_lock(&powerclamp_lock); 161 162 /* Can't set mask when cooling device is in use */ 163 if (powerclamp_data.clamping) { 164 ret = -EAGAIN; 165 goto skip_cpumask_set; 166 } 167 168 ret = alloc_cpumask_var(&new_mask, GFP_KERNEL); 169 if (!ret) 170 goto skip_cpumask_set; 171 172 ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask), 173 nr_cpumask_bits); 174 if (ret) 175 goto free_cpumask_set; 176 177 if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) { 178 ret = -EINVAL; 179 goto free_cpumask_set; 180 } 181 182 /* 183 * When module parameters are passed from kernel command line 184 * during insmod, the module parameter callback is called 185 * before powerclamp_init(), so we can't assume that some 186 * cpumask can be allocated and copied before here. Also 187 * in this case this cpumask is used as the default mask. 188 */ 189 ret = allocate_copy_idle_injection_mask(new_mask); 190 191 free_cpumask_set: 192 free_cpumask_var(new_mask); 193 skip_cpumask_set: 194 mutex_unlock(&powerclamp_lock); 195 196 return ret; 197 } 198 199 static int cpumask_get(char *buf, const struct kernel_param *kp) 200 { 201 if (!cpumask_available(idle_injection_cpu_mask)) 202 return -ENODEV; 203 204 return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask), 205 nr_cpumask_bits); 206 } 207 208 static const struct kernel_param_ops cpumask_ops = { 209 .set = cpumask_set, 210 .get = cpumask_get, 211 }; 212 213 module_param_cb(cpumask, &cpumask_ops, NULL, 0644); 214 MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection."); 215 216 static int max_idle_set(const char *arg, const struct kernel_param *kp) 217 { 218 u8 new_max_idle; 219 int ret = 0; 220 221 mutex_lock(&powerclamp_lock); 222 223 /* Can't set mask when cooling device is in use */ 224 if (powerclamp_data.clamping) { 225 ret = -EAGAIN; 226 goto skip_limit_set; 227 } 228 229 ret = kstrtou8(arg, 10, &new_max_idle); 230 if (ret) 231 goto skip_limit_set; 232 233 if (new_max_idle > MAX_TARGET_RATIO) { 234 ret = -EINVAL; 235 goto skip_limit_set; 236 } 237 238 if (!cpumask_available(idle_injection_cpu_mask)) { 239 ret = allocate_copy_idle_injection_mask(cpu_present_mask); 240 if (ret) 241 goto skip_limit_set; 242 } 243 244 if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { 245 ret = -EINVAL; 246 goto skip_limit_set; 247 } 248 249 max_idle = new_max_idle; 250 251 skip_limit_set: 252 mutex_unlock(&powerclamp_lock); 253 254 return ret; 255 } 256 257 static const struct kernel_param_ops max_idle_ops = { 258 .set = max_idle_set, 259 .get = param_get_int, 260 }; 261 262 module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644); 263 MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100"); 264 265 struct powerclamp_calibration_data { 266 unsigned long confidence; /* used for calibration, basically a counter 267 * gets incremented each time a clamping 268 * period is completed without extra wakeups 269 * once that counter is reached given level, 270 * compensation is deemed usable. 271 */ 272 unsigned long steady_comp; /* steady state compensation used when 273 * no extra wakeups occurred. 274 */ 275 unsigned long dynamic_comp; /* compensate excessive wakeup from idle 276 * mostly from external interrupts. 277 */ 278 }; 279 280 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO]; 281 282 static int window_size_set(const char *arg, const struct kernel_param *kp) 283 { 284 int ret = 0; 285 unsigned long new_window_size; 286 287 ret = kstrtoul(arg, 10, &new_window_size); 288 if (ret) 289 goto exit_win; 290 if (new_window_size > 10 || new_window_size < 2) { 291 pr_err("Out of recommended window size %lu, between 2-10\n", 292 new_window_size); 293 ret = -EINVAL; 294 } 295 296 window_size = clamp(new_window_size, 2ul, 10ul); 297 smp_mb(); 298 299 exit_win: 300 301 return ret; 302 } 303 304 static const struct kernel_param_ops window_size_ops = { 305 .set = window_size_set, 306 .get = param_get_int, 307 }; 308 309 module_param_cb(window_size, &window_size_ops, &window_size, 0644); 310 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" 311 "\tpowerclamp controls idle ratio within this window. larger\n" 312 "\twindow size results in slower response time but more smooth\n" 313 "\tclamping results. default to 2."); 314 315 static void find_target_mwait(void) 316 { 317 unsigned int eax, ebx, ecx, edx; 318 unsigned int highest_cstate = 0; 319 unsigned int highest_subcstate = 0; 320 int i; 321 322 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 323 return; 324 325 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); 326 327 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 328 !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) 329 return; 330 331 edx >>= MWAIT_SUBSTATE_SIZE; 332 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { 333 if (edx & MWAIT_SUBSTATE_MASK) { 334 highest_cstate = i; 335 highest_subcstate = edx & MWAIT_SUBSTATE_MASK; 336 } 337 } 338 target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | 339 (highest_subcstate - 1); 340 341 } 342 343 struct pkg_cstate_info { 344 bool skip; 345 int msr_index; 346 int cstate_id; 347 }; 348 349 #define PKG_CSTATE_INIT(id) { \ 350 .msr_index = MSR_PKG_C##id##_RESIDENCY, \ 351 .cstate_id = id \ 352 } 353 354 static struct pkg_cstate_info pkg_cstates[] = { 355 PKG_CSTATE_INIT(2), 356 PKG_CSTATE_INIT(3), 357 PKG_CSTATE_INIT(6), 358 PKG_CSTATE_INIT(7), 359 PKG_CSTATE_INIT(8), 360 PKG_CSTATE_INIT(9), 361 PKG_CSTATE_INIT(10), 362 {NULL}, 363 }; 364 365 static bool has_pkg_state_counter(void) 366 { 367 u64 val; 368 struct pkg_cstate_info *info = pkg_cstates; 369 370 /* check if any one of the counter msrs exists */ 371 while (info->msr_index) { 372 if (!rdmsrl_safe(info->msr_index, &val)) 373 return true; 374 info++; 375 } 376 377 return false; 378 } 379 380 static u64 pkg_state_counter(void) 381 { 382 u64 val; 383 u64 count = 0; 384 struct pkg_cstate_info *info = pkg_cstates; 385 386 while (info->msr_index) { 387 if (!info->skip) { 388 if (!rdmsrl_safe(info->msr_index, &val)) 389 count += val; 390 else 391 info->skip = true; 392 } 393 info++; 394 } 395 396 return count; 397 } 398 399 static unsigned int get_compensation(int ratio) 400 { 401 unsigned int comp = 0; 402 403 if (!poll_pkg_cstate_enable) 404 return 0; 405 406 /* we only use compensation if all adjacent ones are good */ 407 if (ratio == 1 && 408 cal_data[ratio].confidence >= CONFIDENCE_OK && 409 cal_data[ratio + 1].confidence >= CONFIDENCE_OK && 410 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) { 411 comp = (cal_data[ratio].steady_comp + 412 cal_data[ratio + 1].steady_comp + 413 cal_data[ratio + 2].steady_comp) / 3; 414 } else if (ratio == MAX_TARGET_RATIO - 1 && 415 cal_data[ratio].confidence >= CONFIDENCE_OK && 416 cal_data[ratio - 1].confidence >= CONFIDENCE_OK && 417 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) { 418 comp = (cal_data[ratio].steady_comp + 419 cal_data[ratio - 1].steady_comp + 420 cal_data[ratio - 2].steady_comp) / 3; 421 } else if (cal_data[ratio].confidence >= CONFIDENCE_OK && 422 cal_data[ratio - 1].confidence >= CONFIDENCE_OK && 423 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) { 424 comp = (cal_data[ratio].steady_comp + 425 cal_data[ratio - 1].steady_comp + 426 cal_data[ratio + 1].steady_comp) / 3; 427 } 428 429 /* do not exceed limit */ 430 if (comp + ratio >= MAX_TARGET_RATIO) 431 comp = MAX_TARGET_RATIO - ratio - 1; 432 433 return comp; 434 } 435 436 static void adjust_compensation(int target_ratio, unsigned int win) 437 { 438 int delta; 439 struct powerclamp_calibration_data *d = &cal_data[target_ratio]; 440 441 /* 442 * adjust compensations if confidence level has not been reached. 443 */ 444 if (d->confidence >= CONFIDENCE_OK) 445 return; 446 447 delta = powerclamp_data.target_ratio - current_ratio; 448 /* filter out bad data */ 449 if (delta >= 0 && delta <= (1+target_ratio/10)) { 450 if (d->steady_comp) 451 d->steady_comp = 452 roundup(delta+d->steady_comp, 2)/2; 453 else 454 d->steady_comp = delta; 455 d->confidence++; 456 } 457 } 458 459 static bool powerclamp_adjust_controls(unsigned int target_ratio, 460 unsigned int guard, unsigned int win) 461 { 462 static u64 msr_last, tsc_last; 463 u64 msr_now, tsc_now; 464 u64 val64; 465 466 /* check result for the last window */ 467 msr_now = pkg_state_counter(); 468 tsc_now = rdtsc(); 469 470 /* calculate pkg cstate vs tsc ratio */ 471 if (!msr_last || !tsc_last) 472 current_ratio = 1; 473 else if (tsc_now-tsc_last) { 474 val64 = 100*(msr_now-msr_last); 475 do_div(val64, (tsc_now-tsc_last)); 476 current_ratio = val64; 477 } 478 479 /* update record */ 480 msr_last = msr_now; 481 tsc_last = tsc_now; 482 483 adjust_compensation(target_ratio, win); 484 485 /* if we are above target+guard, skip */ 486 return powerclamp_data.target_ratio + guard <= current_ratio; 487 } 488 489 /* 490 * This function calculates runtime from the current target ratio. 491 * This function gets called under powerclamp_lock. 492 */ 493 static unsigned int get_run_time(void) 494 { 495 unsigned int compensated_ratio; 496 unsigned int runtime; 497 498 /* 499 * make sure user selected ratio does not take effect until 500 * the next round. adjust target_ratio if user has changed 501 * target such that we can converge quickly. 502 */ 503 powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20; 504 powerclamp_data.window_size_now = window_size; 505 506 /* 507 * systems may have different ability to enter package level 508 * c-states, thus we need to compensate the injected idle ratio 509 * to achieve the actual target reported by the HW. 510 */ 511 compensated_ratio = powerclamp_data.target_ratio + 512 get_compensation(powerclamp_data.target_ratio); 513 if (compensated_ratio <= 0) 514 compensated_ratio = 1; 515 516 runtime = duration * 100 / compensated_ratio - duration; 517 518 return runtime; 519 } 520 521 /* 522 * 1 HZ polling while clamping is active, useful for userspace 523 * to monitor actual idle ratio. 524 */ 525 static void poll_pkg_cstate(struct work_struct *dummy); 526 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); 527 static void poll_pkg_cstate(struct work_struct *dummy) 528 { 529 static u64 msr_last; 530 static u64 tsc_last; 531 532 u64 msr_now; 533 u64 tsc_now; 534 u64 val64; 535 536 msr_now = pkg_state_counter(); 537 tsc_now = rdtsc(); 538 539 /* calculate pkg cstate vs tsc ratio */ 540 if (!msr_last || !tsc_last) 541 pkg_cstate_ratio_cur = 1; 542 else { 543 if (tsc_now - tsc_last) { 544 val64 = 100 * (msr_now - msr_last); 545 do_div(val64, (tsc_now - tsc_last)); 546 pkg_cstate_ratio_cur = val64; 547 } 548 } 549 550 /* update record */ 551 msr_last = msr_now; 552 tsc_last = tsc_now; 553 554 mutex_lock(&powerclamp_lock); 555 if (powerclamp_data.clamping) 556 schedule_delayed_work(&poll_pkg_cstate_work, HZ); 557 mutex_unlock(&powerclamp_lock); 558 } 559 560 static struct idle_inject_device *ii_dev; 561 562 /* 563 * This function is called from idle injection core on timer expiry 564 * for the run duration. This allows powerclamp to readjust or skip 565 * injecting idle for this cycle. 566 */ 567 static bool idle_inject_update(void) 568 { 569 bool update = false; 570 571 /* We can't sleep in this callback */ 572 if (!mutex_trylock(&powerclamp_lock)) 573 return true; 574 575 if (!(powerclamp_data.count % powerclamp_data.window_size_now)) { 576 577 should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio, 578 powerclamp_data.guard, 579 powerclamp_data.window_size_now); 580 update = true; 581 } 582 583 if (update) { 584 unsigned int runtime = get_run_time(); 585 586 idle_inject_set_duration(ii_dev, runtime, duration); 587 } 588 589 powerclamp_data.count++; 590 591 mutex_unlock(&powerclamp_lock); 592 593 if (should_skip) 594 return false; 595 596 return true; 597 } 598 599 /* This function starts idle injection by calling idle_inject_start() */ 600 static void trigger_idle_injection(void) 601 { 602 unsigned int runtime = get_run_time(); 603 604 idle_inject_set_duration(ii_dev, runtime, duration); 605 idle_inject_start(ii_dev); 606 powerclamp_data.clamping = true; 607 } 608 609 /* 610 * This function is called from start_power_clamp() to register 611 * CPUS with powercap idle injection register and set default 612 * idle duration and latency. 613 */ 614 static int powerclamp_idle_injection_register(void) 615 { 616 poll_pkg_cstate_enable = false; 617 if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { 618 ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); 619 if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) 620 poll_pkg_cstate_enable = true; 621 } else { 622 ii_dev = idle_inject_register(idle_injection_cpu_mask); 623 } 624 625 if (!ii_dev) { 626 pr_err("powerclamp: idle_inject_register failed\n"); 627 return -EAGAIN; 628 } 629 630 idle_inject_set_duration(ii_dev, TICK_USEC, duration); 631 idle_inject_set_latency(ii_dev, UINT_MAX); 632 633 return 0; 634 } 635 636 /* 637 * This function is called from end_power_clamp() to stop idle injection 638 * and unregister CPUS from powercap idle injection core. 639 */ 640 static void remove_idle_injection(void) 641 { 642 if (!powerclamp_data.clamping) 643 return; 644 645 powerclamp_data.clamping = false; 646 idle_inject_stop(ii_dev); 647 } 648 649 /* 650 * This function is called when user change the cooling device 651 * state from zero to some other value. 652 */ 653 static int start_power_clamp(void) 654 { 655 int ret; 656 657 ret = powerclamp_idle_injection_register(); 658 if (!ret) { 659 trigger_idle_injection(); 660 if (poll_pkg_cstate_enable) 661 schedule_delayed_work(&poll_pkg_cstate_work, 0); 662 } 663 664 return ret; 665 } 666 667 /* 668 * This function is called when user change the cooling device 669 * state from non zero value zero. 670 */ 671 static void end_power_clamp(void) 672 { 673 if (powerclamp_data.clamping) { 674 remove_idle_injection(); 675 idle_inject_unregister(ii_dev); 676 } 677 } 678 679 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, 680 unsigned long *state) 681 { 682 *state = MAX_TARGET_RATIO; 683 684 return 0; 685 } 686 687 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, 688 unsigned long *state) 689 { 690 mutex_lock(&powerclamp_lock); 691 *state = powerclamp_data.target_ratio; 692 mutex_unlock(&powerclamp_lock); 693 694 return 0; 695 } 696 697 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, 698 unsigned long new_target_ratio) 699 { 700 int ret = 0; 701 702 mutex_lock(&powerclamp_lock); 703 704 new_target_ratio = clamp(new_target_ratio, 0UL, 705 (unsigned long) (max_idle - 1)); 706 if (!powerclamp_data.target_ratio && new_target_ratio > 0) { 707 pr_info("Start idle injection to reduce power\n"); 708 powerclamp_data.target_ratio = new_target_ratio; 709 ret = start_power_clamp(); 710 if (ret) 711 powerclamp_data.target_ratio = 0; 712 goto exit_set; 713 } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) { 714 pr_info("Stop forced idle injection\n"); 715 end_power_clamp(); 716 powerclamp_data.target_ratio = 0; 717 } else /* adjust currently running */ { 718 unsigned int runtime; 719 720 powerclamp_data.target_ratio = new_target_ratio; 721 runtime = get_run_time(); 722 idle_inject_set_duration(ii_dev, runtime, duration); 723 } 724 725 exit_set: 726 mutex_unlock(&powerclamp_lock); 727 728 return ret; 729 } 730 731 /* bind to generic thermal layer as cooling device*/ 732 static const struct thermal_cooling_device_ops powerclamp_cooling_ops = { 733 .get_max_state = powerclamp_get_max_state, 734 .get_cur_state = powerclamp_get_cur_state, 735 .set_cur_state = powerclamp_set_cur_state, 736 }; 737 738 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { 739 X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), 740 {} 741 }; 742 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); 743 744 static int __init powerclamp_probe(void) 745 { 746 747 if (!x86_match_cpu(intel_powerclamp_ids)) { 748 pr_err("CPU does not support MWAIT\n"); 749 return -ENODEV; 750 } 751 752 /* The goal for idle time alignment is to achieve package cstate. */ 753 if (!has_pkg_state_counter()) { 754 pr_info("No package C-state available\n"); 755 return -ENODEV; 756 } 757 758 /* find the deepest mwait value */ 759 find_target_mwait(); 760 761 return 0; 762 } 763 764 static int powerclamp_debug_show(struct seq_file *m, void *unused) 765 { 766 int i = 0; 767 768 seq_printf(m, "pct confidence steady dynamic (compensation)\n"); 769 for (i = 0; i < MAX_TARGET_RATIO; i++) { 770 seq_printf(m, "%d\t%lu\t%lu\t%lu\n", 771 i, 772 cal_data[i].confidence, 773 cal_data[i].steady_comp, 774 cal_data[i].dynamic_comp); 775 } 776 777 return 0; 778 } 779 780 DEFINE_SHOW_ATTRIBUTE(powerclamp_debug); 781 782 static inline void powerclamp_create_debug_files(void) 783 { 784 debug_dir = debugfs_create_dir("intel_powerclamp", NULL); 785 786 debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data, 787 &powerclamp_debug_fops); 788 } 789 790 static int __init powerclamp_init(void) 791 { 792 int retval; 793 794 /* probe cpu features and ids here */ 795 retval = powerclamp_probe(); 796 if (retval) 797 return retval; 798 799 mutex_lock(&powerclamp_lock); 800 if (!cpumask_available(idle_injection_cpu_mask)) 801 retval = allocate_copy_idle_injection_mask(cpu_present_mask); 802 mutex_unlock(&powerclamp_lock); 803 804 if (retval) 805 return retval; 806 807 /* set default limit, maybe adjusted during runtime based on feedback */ 808 window_size = 2; 809 810 cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, 811 &powerclamp_cooling_ops); 812 if (IS_ERR(cooling_dev)) 813 return -ENODEV; 814 815 if (!duration) 816 duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES); 817 818 powerclamp_create_debug_files(); 819 820 return 0; 821 } 822 module_init(powerclamp_init); 823 824 static void __exit powerclamp_exit(void) 825 { 826 mutex_lock(&powerclamp_lock); 827 end_power_clamp(); 828 mutex_unlock(&powerclamp_lock); 829 830 thermal_cooling_device_unregister(cooling_dev); 831 832 cancel_delayed_work_sync(&poll_pkg_cstate_work); 833 debugfs_remove_recursive(debug_dir); 834 835 if (cpumask_available(idle_injection_cpu_mask)) 836 free_cpumask_var(idle_injection_cpu_mask); 837 } 838 module_exit(powerclamp_exit); 839 840 MODULE_IMPORT_NS(IDLE_INJECT); 841 842 MODULE_LICENSE("GPL"); 843 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); 844 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); 845 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs"); 846