1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common code for Intel Running Average Power Limit (RAPL) support. 4 * Copyright (c) 2019, Intel Corporation. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/list.h> 11 #include <linux/types.h> 12 #include <linux/device.h> 13 #include <linux/slab.h> 14 #include <linux/log2.h> 15 #include <linux/bitmap.h> 16 #include <linux/delay.h> 17 #include <linux/sysfs.h> 18 #include <linux/cpu.h> 19 #include <linux/powercap.h> 20 #include <linux/suspend.h> 21 #include <linux/intel_rapl.h> 22 #include <linux/processor.h> 23 #include <linux/platform_device.h> 24 25 #include <asm/iosf_mbi.h> 26 #include <asm/cpu_device_id.h> 27 #include <asm/intel-family.h> 28 29 /* bitmasks for RAPL MSRs, used by primitive access functions */ 30 #define ENERGY_STATUS_MASK 0xffffffff 31 32 #define POWER_LIMIT1_MASK 0x7FFF 33 #define POWER_LIMIT1_ENABLE BIT(15) 34 #define POWER_LIMIT1_CLAMP BIT(16) 35 36 #define POWER_LIMIT2_MASK (0x7FFFULL<<32) 37 #define POWER_LIMIT2_ENABLE BIT_ULL(47) 38 #define POWER_LIMIT2_CLAMP BIT_ULL(48) 39 #define POWER_HIGH_LOCK BIT_ULL(63) 40 #define POWER_LOW_LOCK BIT(31) 41 42 #define TIME_WINDOW1_MASK (0x7FULL<<17) 43 #define TIME_WINDOW2_MASK (0x7FULL<<49) 44 45 #define POWER_UNIT_OFFSET 0 46 #define POWER_UNIT_MASK 0x0F 47 48 #define ENERGY_UNIT_OFFSET 0x08 49 #define ENERGY_UNIT_MASK 0x1F00 50 51 #define TIME_UNIT_OFFSET 0x10 52 #define TIME_UNIT_MASK 0xF0000 53 54 #define POWER_INFO_MAX_MASK (0x7fffULL<<32) 55 #define POWER_INFO_MIN_MASK (0x7fffULL<<16) 56 #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48) 57 #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff 58 59 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff 60 #define PP_POLICY_MASK 0x1F 61 62 /* Non HW constants */ 63 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ 64 #define RAPL_PRIMITIVE_DUMMY BIT(2) 65 66 #define TIME_WINDOW_MAX_MSEC 40000 67 #define TIME_WINDOW_MIN_MSEC 250 68 #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ 69 enum unit_type { 70 ARBITRARY_UNIT, /* no translation */ 71 POWER_UNIT, 72 ENERGY_UNIT, 73 TIME_UNIT, 74 }; 75 76 /* per domain data, some are optional */ 77 #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) 78 79 #define DOMAIN_STATE_INACTIVE BIT(0) 80 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) 81 #define DOMAIN_STATE_BIOS_LOCKED BIT(2) 82 83 static const char pl1_name[] = "long_term"; 84 static const char pl2_name[] = "short_term"; 85 86 #define power_zone_to_rapl_domain(_zone) \ 87 container_of(_zone, struct rapl_domain, power_zone) 88 89 struct rapl_defaults { 90 u8 floor_freq_reg_addr; 91 int (*check_unit)(struct rapl_package *rp, int cpu); 92 void (*set_floor_freq)(struct rapl_domain *rd, bool mode); 93 u64 (*compute_time_window)(struct rapl_package *rp, u64 val, 94 bool to_raw); 95 unsigned int dram_domain_energy_unit; 96 }; 97 static struct rapl_defaults *rapl_defaults; 98 99 /* Sideband MBI registers */ 100 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2) 101 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf) 102 103 #define PACKAGE_PLN_INT_SAVED BIT(0) 104 #define MAX_PRIM_NAME (32) 105 106 /* per domain data. used to describe individual knobs such that access function 107 * can be consolidated into one instead of many inline functions. 108 */ 109 struct rapl_primitive_info { 110 const char *name; 111 u64 mask; 112 int shift; 113 enum rapl_domain_reg_id id; 114 enum unit_type unit; 115 u32 flag; 116 }; 117 118 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \ 119 .name = #p, \ 120 .mask = m, \ 121 .shift = s, \ 122 .id = i, \ 123 .unit = u, \ 124 .flag = f \ 125 } 126 127 static void rapl_init_domains(struct rapl_package *rp); 128 static int rapl_read_data_raw(struct rapl_domain *rd, 129 enum rapl_primitives prim, 130 bool xlate, u64 *data); 131 static int rapl_write_data_raw(struct rapl_domain *rd, 132 enum rapl_primitives prim, 133 unsigned long long value); 134 static u64 rapl_unit_xlate(struct rapl_domain *rd, 135 enum unit_type type, u64 value, int to_raw); 136 static void package_power_limit_irq_save(struct rapl_package *rp); 137 138 static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ 139 140 static const char *const rapl_domain_names[] = { 141 "package", 142 "core", 143 "uncore", 144 "dram", 145 "psys", 146 }; 147 148 static int get_energy_counter(struct powercap_zone *power_zone, 149 u64 *energy_raw) 150 { 151 struct rapl_domain *rd; 152 u64 energy_now; 153 154 /* prevent CPU hotplug, make sure the RAPL domain does not go 155 * away while reading the counter. 156 */ 157 get_online_cpus(); 158 rd = power_zone_to_rapl_domain(power_zone); 159 160 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) { 161 *energy_raw = energy_now; 162 put_online_cpus(); 163 164 return 0; 165 } 166 put_online_cpus(); 167 168 return -EIO; 169 } 170 171 static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) 172 { 173 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); 174 175 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); 176 return 0; 177 } 178 179 static int release_zone(struct powercap_zone *power_zone) 180 { 181 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 182 struct rapl_package *rp = rd->rp; 183 184 /* package zone is the last zone of a package, we can free 185 * memory here since all children has been unregistered. 186 */ 187 if (rd->id == RAPL_DOMAIN_PACKAGE) { 188 kfree(rd); 189 rp->domains = NULL; 190 } 191 192 return 0; 193 194 } 195 196 static int find_nr_power_limit(struct rapl_domain *rd) 197 { 198 int i, nr_pl = 0; 199 200 for (i = 0; i < NR_POWER_LIMITS; i++) { 201 if (rd->rpl[i].name) 202 nr_pl++; 203 } 204 205 return nr_pl; 206 } 207 208 static int set_domain_enable(struct powercap_zone *power_zone, bool mode) 209 { 210 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 211 212 if (rd->state & DOMAIN_STATE_BIOS_LOCKED) 213 return -EACCES; 214 215 get_online_cpus(); 216 rapl_write_data_raw(rd, PL1_ENABLE, mode); 217 if (rapl_defaults->set_floor_freq) 218 rapl_defaults->set_floor_freq(rd, mode); 219 put_online_cpus(); 220 221 return 0; 222 } 223 224 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) 225 { 226 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 227 u64 val; 228 229 if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { 230 *mode = false; 231 return 0; 232 } 233 get_online_cpus(); 234 if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) { 235 put_online_cpus(); 236 return -EIO; 237 } 238 *mode = val; 239 put_online_cpus(); 240 241 return 0; 242 } 243 244 /* per RAPL domain ops, in the order of rapl_domain_type */ 245 static const struct powercap_zone_ops zone_ops[] = { 246 /* RAPL_DOMAIN_PACKAGE */ 247 { 248 .get_energy_uj = get_energy_counter, 249 .get_max_energy_range_uj = get_max_energy_counter, 250 .release = release_zone, 251 .set_enable = set_domain_enable, 252 .get_enable = get_domain_enable, 253 }, 254 /* RAPL_DOMAIN_PP0 */ 255 { 256 .get_energy_uj = get_energy_counter, 257 .get_max_energy_range_uj = get_max_energy_counter, 258 .release = release_zone, 259 .set_enable = set_domain_enable, 260 .get_enable = get_domain_enable, 261 }, 262 /* RAPL_DOMAIN_PP1 */ 263 { 264 .get_energy_uj = get_energy_counter, 265 .get_max_energy_range_uj = get_max_energy_counter, 266 .release = release_zone, 267 .set_enable = set_domain_enable, 268 .get_enable = get_domain_enable, 269 }, 270 /* RAPL_DOMAIN_DRAM */ 271 { 272 .get_energy_uj = get_energy_counter, 273 .get_max_energy_range_uj = get_max_energy_counter, 274 .release = release_zone, 275 .set_enable = set_domain_enable, 276 .get_enable = get_domain_enable, 277 }, 278 /* RAPL_DOMAIN_PLATFORM */ 279 { 280 .get_energy_uj = get_energy_counter, 281 .get_max_energy_range_uj = get_max_energy_counter, 282 .release = release_zone, 283 .set_enable = set_domain_enable, 284 .get_enable = get_domain_enable, 285 }, 286 }; 287 288 /* 289 * Constraint index used by powercap can be different than power limit (PL) 290 * index in that some PLs maybe missing due to non-existent MSRs. So we 291 * need to convert here by finding the valid PLs only (name populated). 292 */ 293 static int contraint_to_pl(struct rapl_domain *rd, int cid) 294 { 295 int i, j; 296 297 for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) { 298 if ((rd->rpl[i].name) && j++ == cid) { 299 pr_debug("%s: index %d\n", __func__, i); 300 return i; 301 } 302 } 303 pr_err("Cannot find matching power limit for constraint %d\n", cid); 304 305 return -EINVAL; 306 } 307 308 static int set_power_limit(struct powercap_zone *power_zone, int cid, 309 u64 power_limit) 310 { 311 struct rapl_domain *rd; 312 struct rapl_package *rp; 313 int ret = 0; 314 int id; 315 316 get_online_cpus(); 317 rd = power_zone_to_rapl_domain(power_zone); 318 id = contraint_to_pl(rd, cid); 319 if (id < 0) { 320 ret = id; 321 goto set_exit; 322 } 323 324 rp = rd->rp; 325 326 if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { 327 dev_warn(&power_zone->dev, 328 "%s locked by BIOS, monitoring only\n", rd->name); 329 ret = -EACCES; 330 goto set_exit; 331 } 332 333 switch (rd->rpl[id].prim_id) { 334 case PL1_ENABLE: 335 rapl_write_data_raw(rd, POWER_LIMIT1, power_limit); 336 break; 337 case PL2_ENABLE: 338 rapl_write_data_raw(rd, POWER_LIMIT2, power_limit); 339 break; 340 default: 341 ret = -EINVAL; 342 } 343 if (!ret) 344 package_power_limit_irq_save(rp); 345 set_exit: 346 put_online_cpus(); 347 return ret; 348 } 349 350 static int get_current_power_limit(struct powercap_zone *power_zone, int cid, 351 u64 *data) 352 { 353 struct rapl_domain *rd; 354 u64 val; 355 int prim; 356 int ret = 0; 357 int id; 358 359 get_online_cpus(); 360 rd = power_zone_to_rapl_domain(power_zone); 361 id = contraint_to_pl(rd, cid); 362 if (id < 0) { 363 ret = id; 364 goto get_exit; 365 } 366 367 switch (rd->rpl[id].prim_id) { 368 case PL1_ENABLE: 369 prim = POWER_LIMIT1; 370 break; 371 case PL2_ENABLE: 372 prim = POWER_LIMIT2; 373 break; 374 default: 375 put_online_cpus(); 376 return -EINVAL; 377 } 378 if (rapl_read_data_raw(rd, prim, true, &val)) 379 ret = -EIO; 380 else 381 *data = val; 382 383 get_exit: 384 put_online_cpus(); 385 386 return ret; 387 } 388 389 static int set_time_window(struct powercap_zone *power_zone, int cid, 390 u64 window) 391 { 392 struct rapl_domain *rd; 393 int ret = 0; 394 int id; 395 396 get_online_cpus(); 397 rd = power_zone_to_rapl_domain(power_zone); 398 id = contraint_to_pl(rd, cid); 399 if (id < 0) { 400 ret = id; 401 goto set_time_exit; 402 } 403 404 switch (rd->rpl[id].prim_id) { 405 case PL1_ENABLE: 406 rapl_write_data_raw(rd, TIME_WINDOW1, window); 407 break; 408 case PL2_ENABLE: 409 rapl_write_data_raw(rd, TIME_WINDOW2, window); 410 break; 411 default: 412 ret = -EINVAL; 413 } 414 415 set_time_exit: 416 put_online_cpus(); 417 return ret; 418 } 419 420 static int get_time_window(struct powercap_zone *power_zone, int cid, 421 u64 *data) 422 { 423 struct rapl_domain *rd; 424 u64 val; 425 int ret = 0; 426 int id; 427 428 get_online_cpus(); 429 rd = power_zone_to_rapl_domain(power_zone); 430 id = contraint_to_pl(rd, cid); 431 if (id < 0) { 432 ret = id; 433 goto get_time_exit; 434 } 435 436 switch (rd->rpl[id].prim_id) { 437 case PL1_ENABLE: 438 ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val); 439 break; 440 case PL2_ENABLE: 441 ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val); 442 break; 443 default: 444 put_online_cpus(); 445 return -EINVAL; 446 } 447 if (!ret) 448 *data = val; 449 450 get_time_exit: 451 put_online_cpus(); 452 453 return ret; 454 } 455 456 static const char *get_constraint_name(struct powercap_zone *power_zone, 457 int cid) 458 { 459 struct rapl_domain *rd; 460 int id; 461 462 rd = power_zone_to_rapl_domain(power_zone); 463 id = contraint_to_pl(rd, cid); 464 if (id >= 0) 465 return rd->rpl[id].name; 466 467 return NULL; 468 } 469 470 static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data) 471 { 472 struct rapl_domain *rd; 473 u64 val; 474 int prim; 475 int ret = 0; 476 477 get_online_cpus(); 478 rd = power_zone_to_rapl_domain(power_zone); 479 switch (rd->rpl[id].prim_id) { 480 case PL1_ENABLE: 481 prim = THERMAL_SPEC_POWER; 482 break; 483 case PL2_ENABLE: 484 prim = MAX_POWER; 485 break; 486 default: 487 put_online_cpus(); 488 return -EINVAL; 489 } 490 if (rapl_read_data_raw(rd, prim, true, &val)) 491 ret = -EIO; 492 else 493 *data = val; 494 495 put_online_cpus(); 496 497 return ret; 498 } 499 500 static const struct powercap_zone_constraint_ops constraint_ops = { 501 .set_power_limit_uw = set_power_limit, 502 .get_power_limit_uw = get_current_power_limit, 503 .set_time_window_us = set_time_window, 504 .get_time_window_us = get_time_window, 505 .get_max_power_uw = get_max_power, 506 .get_name = get_constraint_name, 507 }; 508 509 /* called after domain detection and package level data are set */ 510 static void rapl_init_domains(struct rapl_package *rp) 511 { 512 enum rapl_domain_type i; 513 enum rapl_domain_reg_id j; 514 struct rapl_domain *rd = rp->domains; 515 516 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 517 unsigned int mask = rp->domain_map & (1 << i); 518 519 if (!mask) 520 continue; 521 522 rd->rp = rp; 523 rd->name = rapl_domain_names[i]; 524 rd->id = i; 525 rd->rpl[0].prim_id = PL1_ENABLE; 526 rd->rpl[0].name = pl1_name; 527 /* some domain may support two power limits */ 528 if (rp->priv->limits[i] == 2) { 529 rd->rpl[1].prim_id = PL2_ENABLE; 530 rd->rpl[1].name = pl2_name; 531 } 532 533 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) 534 rd->regs[j] = rp->priv->regs[i][j]; 535 536 if (i == RAPL_DOMAIN_DRAM) { 537 rd->domain_energy_unit = 538 rapl_defaults->dram_domain_energy_unit; 539 if (rd->domain_energy_unit) 540 pr_info("DRAM domain energy unit %dpj\n", 541 rd->domain_energy_unit); 542 } 543 rd++; 544 } 545 } 546 547 static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, 548 u64 value, int to_raw) 549 { 550 u64 units = 1; 551 struct rapl_package *rp = rd->rp; 552 u64 scale = 1; 553 554 switch (type) { 555 case POWER_UNIT: 556 units = rp->power_unit; 557 break; 558 case ENERGY_UNIT: 559 scale = ENERGY_UNIT_SCALE; 560 /* per domain unit takes precedence */ 561 if (rd->domain_energy_unit) 562 units = rd->domain_energy_unit; 563 else 564 units = rp->energy_unit; 565 break; 566 case TIME_UNIT: 567 return rapl_defaults->compute_time_window(rp, value, to_raw); 568 case ARBITRARY_UNIT: 569 default: 570 return value; 571 }; 572 573 if (to_raw) 574 return div64_u64(value, units) * scale; 575 576 value *= units; 577 578 return div64_u64(value, scale); 579 } 580 581 /* in the order of enum rapl_primitives */ 582 static struct rapl_primitive_info rpi[] = { 583 /* name, mask, shift, msr index, unit divisor */ 584 PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 585 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 586 PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, 587 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 588 PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, 589 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 590 PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, 591 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 592 PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, 593 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 594 PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, 595 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 596 PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, 597 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 598 PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, 599 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 600 PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, 601 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 602 PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, 603 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 604 PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, 605 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 606 PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, 607 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 608 PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, 609 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 610 PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, 611 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 612 PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 613 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 614 PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, 615 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), 616 /* non-hardware */ 617 PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, 618 RAPL_PRIMITIVE_DERIVED), 619 {NULL, 0, 0, 0}, 620 }; 621 622 /* Read primitive data based on its related struct rapl_primitive_info. 623 * if xlate flag is set, return translated data based on data units, i.e. 624 * time, energy, and power. 625 * RAPL MSRs are non-architectual and are laid out not consistently across 626 * domains. Here we use primitive info to allow writing consolidated access 627 * functions. 628 * For a given primitive, it is processed by MSR mask and shift. Unit conversion 629 * is pre-assigned based on RAPL unit MSRs read at init time. 630 * 63-------------------------- 31--------------------------- 0 631 * | xxxxx (mask) | 632 * | |<- shift ----------------| 633 * 63-------------------------- 31--------------------------- 0 634 */ 635 static int rapl_read_data_raw(struct rapl_domain *rd, 636 enum rapl_primitives prim, bool xlate, u64 *data) 637 { 638 u64 value; 639 struct rapl_primitive_info *rp = &rpi[prim]; 640 struct reg_action ra; 641 int cpu; 642 643 if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY) 644 return -EINVAL; 645 646 ra.reg = rd->regs[rp->id]; 647 if (!ra.reg) 648 return -EINVAL; 649 650 cpu = rd->rp->lead_cpu; 651 652 /* domain with 2 limits has different bit */ 653 if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) { 654 rp->mask = POWER_HIGH_LOCK; 655 rp->shift = 63; 656 } 657 /* non-hardware data are collected by the polling thread */ 658 if (rp->flag & RAPL_PRIMITIVE_DERIVED) { 659 *data = rd->rdd.primitives[prim]; 660 return 0; 661 } 662 663 ra.mask = rp->mask; 664 665 if (rd->rp->priv->read_raw(cpu, &ra)) { 666 pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu); 667 return -EIO; 668 } 669 670 value = ra.value >> rp->shift; 671 672 if (xlate) 673 *data = rapl_unit_xlate(rd, rp->unit, value, 0); 674 else 675 *data = value; 676 677 return 0; 678 } 679 680 /* Similar use of primitive info in the read counterpart */ 681 static int rapl_write_data_raw(struct rapl_domain *rd, 682 enum rapl_primitives prim, 683 unsigned long long value) 684 { 685 struct rapl_primitive_info *rp = &rpi[prim]; 686 int cpu; 687 u64 bits; 688 struct reg_action ra; 689 int ret; 690 691 cpu = rd->rp->lead_cpu; 692 bits = rapl_unit_xlate(rd, rp->unit, value, 1); 693 bits <<= rp->shift; 694 bits &= rp->mask; 695 696 memset(&ra, 0, sizeof(ra)); 697 698 ra.reg = rd->regs[rp->id]; 699 ra.mask = rp->mask; 700 ra.value = bits; 701 702 ret = rd->rp->priv->write_raw(cpu, &ra); 703 704 return ret; 705 } 706 707 /* 708 * Raw RAPL data stored in MSRs are in certain scales. We need to 709 * convert them into standard units based on the units reported in 710 * the RAPL unit MSRs. This is specific to CPUs as the method to 711 * calculate units differ on different CPUs. 712 * We convert the units to below format based on CPUs. 713 * i.e. 714 * energy unit: picoJoules : Represented in picoJoules by default 715 * power unit : microWatts : Represented in milliWatts by default 716 * time unit : microseconds: Represented in seconds by default 717 */ 718 static int rapl_check_unit_core(struct rapl_package *rp, int cpu) 719 { 720 struct reg_action ra; 721 u32 value; 722 723 ra.reg = rp->priv->reg_unit; 724 ra.mask = ~0; 725 if (rp->priv->read_raw(cpu, &ra)) { 726 pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", 727 rp->priv->reg_unit, cpu); 728 return -ENODEV; 729 } 730 731 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 732 rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 733 734 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 735 rp->power_unit = 1000000 / (1 << value); 736 737 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 738 rp->time_unit = 1000000 / (1 << value); 739 740 pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n", 741 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); 742 743 return 0; 744 } 745 746 static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) 747 { 748 struct reg_action ra; 749 u32 value; 750 751 ra.reg = rp->priv->reg_unit; 752 ra.mask = ~0; 753 if (rp->priv->read_raw(cpu, &ra)) { 754 pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", 755 rp->priv->reg_unit, cpu); 756 return -ENODEV; 757 } 758 759 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 760 rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value; 761 762 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 763 rp->power_unit = (1 << value) * 1000; 764 765 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 766 rp->time_unit = 1000000 / (1 << value); 767 768 pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n", 769 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); 770 771 return 0; 772 } 773 774 static void power_limit_irq_save_cpu(void *info) 775 { 776 u32 l, h = 0; 777 struct rapl_package *rp = (struct rapl_package *)info; 778 779 /* save the state of PLN irq mask bit before disabling it */ 780 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 781 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { 782 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; 783 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; 784 } 785 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 786 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 787 } 788 789 /* REVISIT: 790 * When package power limit is set artificially low by RAPL, LVT 791 * thermal interrupt for package power limit should be ignored 792 * since we are not really exceeding the real limit. The intention 793 * is to avoid excessive interrupts while we are trying to save power. 794 * A useful feature might be routing the package_power_limit interrupt 795 * to userspace via eventfd. once we have a usecase, this is simple 796 * to do by adding an atomic notifier. 797 */ 798 799 static void package_power_limit_irq_save(struct rapl_package *rp) 800 { 801 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 802 return; 803 804 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); 805 } 806 807 /* 808 * Restore per package power limit interrupt enable state. Called from cpu 809 * hotplug code on package removal. 810 */ 811 static void package_power_limit_irq_restore(struct rapl_package *rp) 812 { 813 u32 l, h; 814 815 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 816 return; 817 818 /* irq enable state not saved, nothing to restore */ 819 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) 820 return; 821 822 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 823 824 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) 825 l |= PACKAGE_THERM_INT_PLN_ENABLE; 826 else 827 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 828 829 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 830 } 831 832 static void set_floor_freq_default(struct rapl_domain *rd, bool mode) 833 { 834 int nr_powerlimit = find_nr_power_limit(rd); 835 836 /* always enable clamp such that p-state can go below OS requested 837 * range. power capping priority over guranteed frequency. 838 */ 839 rapl_write_data_raw(rd, PL1_CLAMP, mode); 840 841 /* some domains have pl2 */ 842 if (nr_powerlimit > 1) { 843 rapl_write_data_raw(rd, PL2_ENABLE, mode); 844 rapl_write_data_raw(rd, PL2_CLAMP, mode); 845 } 846 } 847 848 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) 849 { 850 static u32 power_ctrl_orig_val; 851 u32 mdata; 852 853 if (!rapl_defaults->floor_freq_reg_addr) { 854 pr_err("Invalid floor frequency config register\n"); 855 return; 856 } 857 858 if (!power_ctrl_orig_val) 859 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ, 860 rapl_defaults->floor_freq_reg_addr, 861 &power_ctrl_orig_val); 862 mdata = power_ctrl_orig_val; 863 if (enable) { 864 mdata &= ~(0x7f << 8); 865 mdata |= 1 << 8; 866 } 867 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE, 868 rapl_defaults->floor_freq_reg_addr, mdata); 869 } 870 871 static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, 872 bool to_raw) 873 { 874 u64 f, y; /* fraction and exp. used for time unit */ 875 876 /* 877 * Special processing based on 2^Y*(1+F/4), refer 878 * to Intel Software Developer's manual Vol.3B: CH 14.9.3. 879 */ 880 if (!to_raw) { 881 f = (value & 0x60) >> 5; 882 y = value & 0x1f; 883 value = (1 << y) * (4 + f) * rp->time_unit / 4; 884 } else { 885 do_div(value, rp->time_unit); 886 y = ilog2(value); 887 f = div64_u64(4 * (value - (1 << y)), 1 << y); 888 value = (y & 0x1f) | ((f & 0x3) << 5); 889 } 890 return value; 891 } 892 893 static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, 894 bool to_raw) 895 { 896 /* 897 * Atom time unit encoding is straight forward val * time_unit, 898 * where time_unit is default to 1 sec. Never 0. 899 */ 900 if (!to_raw) 901 return (value) ? value *= rp->time_unit : rp->time_unit; 902 903 value = div64_u64(value, rp->time_unit); 904 905 return value; 906 } 907 908 static const struct rapl_defaults rapl_defaults_core = { 909 .floor_freq_reg_addr = 0, 910 .check_unit = rapl_check_unit_core, 911 .set_floor_freq = set_floor_freq_default, 912 .compute_time_window = rapl_compute_time_window_core, 913 }; 914 915 static const struct rapl_defaults rapl_defaults_hsw_server = { 916 .check_unit = rapl_check_unit_core, 917 .set_floor_freq = set_floor_freq_default, 918 .compute_time_window = rapl_compute_time_window_core, 919 .dram_domain_energy_unit = 15300, 920 }; 921 922 static const struct rapl_defaults rapl_defaults_byt = { 923 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, 924 .check_unit = rapl_check_unit_atom, 925 .set_floor_freq = set_floor_freq_atom, 926 .compute_time_window = rapl_compute_time_window_atom, 927 }; 928 929 static const struct rapl_defaults rapl_defaults_tng = { 930 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG, 931 .check_unit = rapl_check_unit_atom, 932 .set_floor_freq = set_floor_freq_atom, 933 .compute_time_window = rapl_compute_time_window_atom, 934 }; 935 936 static const struct rapl_defaults rapl_defaults_ann = { 937 .floor_freq_reg_addr = 0, 938 .check_unit = rapl_check_unit_atom, 939 .set_floor_freq = NULL, 940 .compute_time_window = rapl_compute_time_window_atom, 941 }; 942 943 static const struct rapl_defaults rapl_defaults_cht = { 944 .floor_freq_reg_addr = 0, 945 .check_unit = rapl_check_unit_atom, 946 .set_floor_freq = NULL, 947 .compute_time_window = rapl_compute_time_window_atom, 948 }; 949 950 static const struct x86_cpu_id rapl_ids[] __initconst = { 951 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), 952 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), 953 954 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core), 955 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core), 956 957 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core), 958 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core), 959 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core), 960 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server), 961 962 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core), 963 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core), 964 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core), 965 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server), 966 967 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core), 968 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core), 969 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server), 970 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core), 971 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core), 972 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core), 973 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core), 974 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core), 975 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core), 976 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server), 977 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server), 978 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), 979 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), 980 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), 981 982 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), 983 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), 984 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng), 985 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann), 986 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), 987 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), 988 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), 989 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core), 990 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), 991 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), 992 993 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), 994 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), 995 {} 996 }; 997 MODULE_DEVICE_TABLE(x86cpu, rapl_ids); 998 999 /* Read once for all raw primitive data for domains */ 1000 static void rapl_update_domain_data(struct rapl_package *rp) 1001 { 1002 int dmn, prim; 1003 u64 val; 1004 1005 for (dmn = 0; dmn < rp->nr_domains; dmn++) { 1006 pr_debug("update %s domain %s data\n", rp->name, 1007 rp->domains[dmn].name); 1008 /* exclude non-raw primitives */ 1009 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { 1010 if (!rapl_read_data_raw(&rp->domains[dmn], prim, 1011 rpi[prim].unit, &val)) 1012 rp->domains[dmn].rdd.primitives[prim] = val; 1013 } 1014 } 1015 1016 } 1017 1018 static int rapl_package_register_powercap(struct rapl_package *rp) 1019 { 1020 struct rapl_domain *rd; 1021 struct powercap_zone *power_zone = NULL; 1022 int nr_pl, ret; 1023 1024 /* Update the domain data of the new package */ 1025 rapl_update_domain_data(rp); 1026 1027 /* first we register package domain as the parent zone */ 1028 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1029 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1030 nr_pl = find_nr_power_limit(rd); 1031 pr_debug("register package domain %s\n", rp->name); 1032 power_zone = powercap_register_zone(&rd->power_zone, 1033 rp->priv->control_type, rp->name, 1034 NULL, &zone_ops[rd->id], nr_pl, 1035 &constraint_ops); 1036 if (IS_ERR(power_zone)) { 1037 pr_debug("failed to register power zone %s\n", 1038 rp->name); 1039 return PTR_ERR(power_zone); 1040 } 1041 /* track parent zone in per package/socket data */ 1042 rp->power_zone = power_zone; 1043 /* done, only one package domain per socket */ 1044 break; 1045 } 1046 } 1047 if (!power_zone) { 1048 pr_err("no package domain found, unknown topology!\n"); 1049 return -ENODEV; 1050 } 1051 /* now register domains as children of the socket/package */ 1052 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1053 if (rd->id == RAPL_DOMAIN_PACKAGE) 1054 continue; 1055 /* number of power limits per domain varies */ 1056 nr_pl = find_nr_power_limit(rd); 1057 power_zone = powercap_register_zone(&rd->power_zone, 1058 rp->priv->control_type, 1059 rd->name, rp->power_zone, 1060 &zone_ops[rd->id], nr_pl, 1061 &constraint_ops); 1062 1063 if (IS_ERR(power_zone)) { 1064 pr_debug("failed to register power_zone, %s:%s\n", 1065 rp->name, rd->name); 1066 ret = PTR_ERR(power_zone); 1067 goto err_cleanup; 1068 } 1069 } 1070 return 0; 1071 1072 err_cleanup: 1073 /* 1074 * Clean up previously initialized domains within the package if we 1075 * failed after the first domain setup. 1076 */ 1077 while (--rd >= rp->domains) { 1078 pr_debug("unregister %s domain %s\n", rp->name, rd->name); 1079 powercap_unregister_zone(rp->priv->control_type, 1080 &rd->power_zone); 1081 } 1082 1083 return ret; 1084 } 1085 1086 int rapl_add_platform_domain(struct rapl_if_priv *priv) 1087 { 1088 struct rapl_domain *rd; 1089 struct powercap_zone *power_zone; 1090 struct reg_action ra; 1091 int ret; 1092 1093 ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; 1094 ra.mask = ~0; 1095 ret = priv->read_raw(0, &ra); 1096 if (ret || !ra.value) 1097 return -ENODEV; 1098 1099 ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; 1100 ra.mask = ~0; 1101 ret = priv->read_raw(0, &ra); 1102 if (ret || !ra.value) 1103 return -ENODEV; 1104 1105 rd = kzalloc(sizeof(*rd), GFP_KERNEL); 1106 if (!rd) 1107 return -ENOMEM; 1108 1109 rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; 1110 rd->id = RAPL_DOMAIN_PLATFORM; 1111 rd->regs[RAPL_DOMAIN_REG_LIMIT] = 1112 priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; 1113 rd->regs[RAPL_DOMAIN_REG_STATUS] = 1114 priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; 1115 rd->rpl[0].prim_id = PL1_ENABLE; 1116 rd->rpl[0].name = pl1_name; 1117 rd->rpl[1].prim_id = PL2_ENABLE; 1118 rd->rpl[1].name = pl2_name; 1119 rd->rp = rapl_find_package_domain(0, priv); 1120 1121 power_zone = powercap_register_zone(&rd->power_zone, priv->control_type, 1122 "psys", NULL, 1123 &zone_ops[RAPL_DOMAIN_PLATFORM], 1124 2, &constraint_ops); 1125 1126 if (IS_ERR(power_zone)) { 1127 kfree(rd); 1128 return PTR_ERR(power_zone); 1129 } 1130 1131 priv->platform_rapl_domain = rd; 1132 1133 return 0; 1134 } 1135 EXPORT_SYMBOL_GPL(rapl_add_platform_domain); 1136 1137 void rapl_remove_platform_domain(struct rapl_if_priv *priv) 1138 { 1139 if (priv->platform_rapl_domain) { 1140 powercap_unregister_zone(priv->control_type, 1141 &priv->platform_rapl_domain->power_zone); 1142 kfree(priv->platform_rapl_domain); 1143 } 1144 } 1145 EXPORT_SYMBOL_GPL(rapl_remove_platform_domain); 1146 1147 static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) 1148 { 1149 struct reg_action ra; 1150 1151 switch (domain) { 1152 case RAPL_DOMAIN_PACKAGE: 1153 case RAPL_DOMAIN_PP0: 1154 case RAPL_DOMAIN_PP1: 1155 case RAPL_DOMAIN_DRAM: 1156 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; 1157 break; 1158 case RAPL_DOMAIN_PLATFORM: 1159 /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */ 1160 return -EINVAL; 1161 default: 1162 pr_err("invalid domain id %d\n", domain); 1163 return -EINVAL; 1164 } 1165 /* make sure domain counters are available and contains non-zero 1166 * values, otherwise skip it. 1167 */ 1168 1169 ra.mask = ~0; 1170 if (rp->priv->read_raw(cpu, &ra) || !ra.value) 1171 return -ENODEV; 1172 1173 return 0; 1174 } 1175 1176 /* 1177 * Check if power limits are available. Two cases when they are not available: 1178 * 1. Locked by BIOS, in this case we still provide read-only access so that 1179 * users can see what limit is set by the BIOS. 1180 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not 1181 * exist at all. In this case, we do not show the constraints in powercap. 1182 * 1183 * Called after domains are detected and initialized. 1184 */ 1185 static void rapl_detect_powerlimit(struct rapl_domain *rd) 1186 { 1187 u64 val64; 1188 int i; 1189 1190 /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */ 1191 if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) { 1192 if (val64) { 1193 pr_info("RAPL %s domain %s locked by BIOS\n", 1194 rd->rp->name, rd->name); 1195 rd->state |= DOMAIN_STATE_BIOS_LOCKED; 1196 } 1197 } 1198 /* check if power limit MSR exists, otherwise domain is monitoring only */ 1199 for (i = 0; i < NR_POWER_LIMITS; i++) { 1200 int prim = rd->rpl[i].prim_id; 1201 1202 if (rapl_read_data_raw(rd, prim, false, &val64)) 1203 rd->rpl[i].name = NULL; 1204 } 1205 } 1206 1207 /* Detect active and valid domains for the given CPU, caller must 1208 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. 1209 */ 1210 static int rapl_detect_domains(struct rapl_package *rp, int cpu) 1211 { 1212 struct rapl_domain *rd; 1213 int i; 1214 1215 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 1216 /* use physical package id to read counters */ 1217 if (!rapl_check_domain(cpu, i, rp)) { 1218 rp->domain_map |= 1 << i; 1219 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); 1220 } 1221 } 1222 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); 1223 if (!rp->nr_domains) { 1224 pr_debug("no valid rapl domains found in %s\n", rp->name); 1225 return -ENODEV; 1226 } 1227 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); 1228 1229 rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), 1230 GFP_KERNEL); 1231 if (!rp->domains) 1232 return -ENOMEM; 1233 1234 rapl_init_domains(rp); 1235 1236 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) 1237 rapl_detect_powerlimit(rd); 1238 1239 return 0; 1240 } 1241 1242 /* called from CPU hotplug notifier, hotplug lock held */ 1243 void rapl_remove_package(struct rapl_package *rp) 1244 { 1245 struct rapl_domain *rd, *rd_package = NULL; 1246 1247 package_power_limit_irq_restore(rp); 1248 1249 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1250 rapl_write_data_raw(rd, PL1_ENABLE, 0); 1251 rapl_write_data_raw(rd, PL1_CLAMP, 0); 1252 if (find_nr_power_limit(rd) > 1) { 1253 rapl_write_data_raw(rd, PL2_ENABLE, 0); 1254 rapl_write_data_raw(rd, PL2_CLAMP, 0); 1255 } 1256 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1257 rd_package = rd; 1258 continue; 1259 } 1260 pr_debug("remove package, undo power limit on %s: %s\n", 1261 rp->name, rd->name); 1262 powercap_unregister_zone(rp->priv->control_type, 1263 &rd->power_zone); 1264 } 1265 /* do parent zone last */ 1266 powercap_unregister_zone(rp->priv->control_type, 1267 &rd_package->power_zone); 1268 list_del(&rp->plist); 1269 kfree(rp); 1270 } 1271 EXPORT_SYMBOL_GPL(rapl_remove_package); 1272 1273 /* caller to ensure CPU hotplug lock is held */ 1274 struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv) 1275 { 1276 int id = topology_logical_die_id(cpu); 1277 struct rapl_package *rp; 1278 1279 list_for_each_entry(rp, &rapl_packages, plist) { 1280 if (rp->id == id 1281 && rp->priv->control_type == priv->control_type) 1282 return rp; 1283 } 1284 1285 return NULL; 1286 } 1287 EXPORT_SYMBOL_GPL(rapl_find_package_domain); 1288 1289 /* called from CPU hotplug notifier, hotplug lock held */ 1290 struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) 1291 { 1292 int id = topology_logical_die_id(cpu); 1293 struct rapl_package *rp; 1294 struct cpuinfo_x86 *c = &cpu_data(cpu); 1295 int ret; 1296 1297 if (!rapl_defaults) 1298 return ERR_PTR(-ENODEV); 1299 1300 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); 1301 if (!rp) 1302 return ERR_PTR(-ENOMEM); 1303 1304 /* add the new package to the list */ 1305 rp->id = id; 1306 rp->lead_cpu = cpu; 1307 rp->priv = priv; 1308 1309 if (topology_max_die_per_package() > 1) 1310 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, 1311 "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id); 1312 else 1313 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", 1314 c->phys_proc_id); 1315 1316 /* check if the package contains valid domains */ 1317 if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) { 1318 ret = -ENODEV; 1319 goto err_free_package; 1320 } 1321 ret = rapl_package_register_powercap(rp); 1322 if (!ret) { 1323 INIT_LIST_HEAD(&rp->plist); 1324 list_add(&rp->plist, &rapl_packages); 1325 return rp; 1326 } 1327 1328 err_free_package: 1329 kfree(rp->domains); 1330 kfree(rp); 1331 return ERR_PTR(ret); 1332 } 1333 EXPORT_SYMBOL_GPL(rapl_add_package); 1334 1335 static void power_limit_state_save(void) 1336 { 1337 struct rapl_package *rp; 1338 struct rapl_domain *rd; 1339 int nr_pl, ret, i; 1340 1341 get_online_cpus(); 1342 list_for_each_entry(rp, &rapl_packages, plist) { 1343 if (!rp->power_zone) 1344 continue; 1345 rd = power_zone_to_rapl_domain(rp->power_zone); 1346 nr_pl = find_nr_power_limit(rd); 1347 for (i = 0; i < nr_pl; i++) { 1348 switch (rd->rpl[i].prim_id) { 1349 case PL1_ENABLE: 1350 ret = rapl_read_data_raw(rd, 1351 POWER_LIMIT1, true, 1352 &rd->rpl[i].last_power_limit); 1353 if (ret) 1354 rd->rpl[i].last_power_limit = 0; 1355 break; 1356 case PL2_ENABLE: 1357 ret = rapl_read_data_raw(rd, 1358 POWER_LIMIT2, true, 1359 &rd->rpl[i].last_power_limit); 1360 if (ret) 1361 rd->rpl[i].last_power_limit = 0; 1362 break; 1363 } 1364 } 1365 } 1366 put_online_cpus(); 1367 } 1368 1369 static void power_limit_state_restore(void) 1370 { 1371 struct rapl_package *rp; 1372 struct rapl_domain *rd; 1373 int nr_pl, i; 1374 1375 get_online_cpus(); 1376 list_for_each_entry(rp, &rapl_packages, plist) { 1377 if (!rp->power_zone) 1378 continue; 1379 rd = power_zone_to_rapl_domain(rp->power_zone); 1380 nr_pl = find_nr_power_limit(rd); 1381 for (i = 0; i < nr_pl; i++) { 1382 switch (rd->rpl[i].prim_id) { 1383 case PL1_ENABLE: 1384 if (rd->rpl[i].last_power_limit) 1385 rapl_write_data_raw(rd, POWER_LIMIT1, 1386 rd->rpl[i].last_power_limit); 1387 break; 1388 case PL2_ENABLE: 1389 if (rd->rpl[i].last_power_limit) 1390 rapl_write_data_raw(rd, POWER_LIMIT2, 1391 rd->rpl[i].last_power_limit); 1392 break; 1393 } 1394 } 1395 } 1396 put_online_cpus(); 1397 } 1398 1399 static int rapl_pm_callback(struct notifier_block *nb, 1400 unsigned long mode, void *_unused) 1401 { 1402 switch (mode) { 1403 case PM_SUSPEND_PREPARE: 1404 power_limit_state_save(); 1405 break; 1406 case PM_POST_SUSPEND: 1407 power_limit_state_restore(); 1408 break; 1409 } 1410 return NOTIFY_OK; 1411 } 1412 1413 static struct notifier_block rapl_pm_notifier = { 1414 .notifier_call = rapl_pm_callback, 1415 }; 1416 1417 static struct platform_device *rapl_msr_platdev; 1418 1419 static int __init rapl_init(void) 1420 { 1421 const struct x86_cpu_id *id; 1422 int ret; 1423 1424 id = x86_match_cpu(rapl_ids); 1425 if (!id) { 1426 pr_err("driver does not support CPU family %d model %d\n", 1427 boot_cpu_data.x86, boot_cpu_data.x86_model); 1428 1429 return -ENODEV; 1430 } 1431 1432 rapl_defaults = (struct rapl_defaults *)id->driver_data; 1433 1434 ret = register_pm_notifier(&rapl_pm_notifier); 1435 if (ret) 1436 return ret; 1437 1438 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); 1439 if (!rapl_msr_platdev) { 1440 ret = -ENOMEM; 1441 goto end; 1442 } 1443 1444 ret = platform_device_add(rapl_msr_platdev); 1445 if (ret) 1446 platform_device_put(rapl_msr_platdev); 1447 1448 end: 1449 if (ret) 1450 unregister_pm_notifier(&rapl_pm_notifier); 1451 1452 return ret; 1453 } 1454 1455 static void __exit rapl_exit(void) 1456 { 1457 platform_device_unregister(rapl_msr_platdev); 1458 unregister_pm_notifier(&rapl_pm_notifier); 1459 } 1460 1461 fs_initcall(rapl_init); 1462 module_exit(rapl_exit); 1463 1464 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); 1465 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>"); 1466 MODULE_LICENSE("GPL v2"); 1467