1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common code for Intel Running Average Power Limit (RAPL) support. 4 * Copyright (c) 2019, Intel Corporation. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/list.h> 11 #include <linux/types.h> 12 #include <linux/device.h> 13 #include <linux/slab.h> 14 #include <linux/log2.h> 15 #include <linux/bitmap.h> 16 #include <linux/delay.h> 17 #include <linux/sysfs.h> 18 #include <linux/cpu.h> 19 #include <linux/powercap.h> 20 #include <linux/suspend.h> 21 #include <linux/intel_rapl.h> 22 #include <linux/processor.h> 23 #include <linux/platform_device.h> 24 25 #include <asm/iosf_mbi.h> 26 #include <asm/cpu_device_id.h> 27 #include <asm/intel-family.h> 28 29 /* bitmasks for RAPL MSRs, used by primitive access functions */ 30 #define ENERGY_STATUS_MASK 0xffffffff 31 32 #define POWER_LIMIT1_MASK 0x7FFF 33 #define POWER_LIMIT1_ENABLE BIT(15) 34 #define POWER_LIMIT1_CLAMP BIT(16) 35 36 #define POWER_LIMIT2_MASK (0x7FFFULL<<32) 37 #define POWER_LIMIT2_ENABLE BIT_ULL(47) 38 #define POWER_LIMIT2_CLAMP BIT_ULL(48) 39 #define POWER_HIGH_LOCK BIT_ULL(63) 40 #define POWER_LOW_LOCK BIT(31) 41 42 #define POWER_LIMIT4_MASK 0x1FFF 43 44 #define TIME_WINDOW1_MASK (0x7FULL<<17) 45 #define TIME_WINDOW2_MASK (0x7FULL<<49) 46 47 #define POWER_UNIT_OFFSET 0 48 #define POWER_UNIT_MASK 0x0F 49 50 #define ENERGY_UNIT_OFFSET 0x08 51 #define ENERGY_UNIT_MASK 0x1F00 52 53 #define TIME_UNIT_OFFSET 0x10 54 #define TIME_UNIT_MASK 0xF0000 55 56 #define POWER_INFO_MAX_MASK (0x7fffULL<<32) 57 #define POWER_INFO_MIN_MASK (0x7fffULL<<16) 58 #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48) 59 #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff 60 61 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff 62 #define PP_POLICY_MASK 0x1F 63 64 /* 65 * SPR has different layout for Psys Domain PowerLimit registers. 66 * There are 17 bits of PL1 and PL2 instead of 15 bits. 67 * The Enable bits and TimeWindow bits are also shifted as a result. 68 */ 69 #define PSYS_POWER_LIMIT1_MASK 0x1FFFF 70 #define PSYS_POWER_LIMIT1_ENABLE BIT(17) 71 72 #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) 73 #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) 74 75 #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) 76 #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) 77 78 /* bitmasks for RAPL TPMI, used by primitive access functions */ 79 #define TPMI_POWER_LIMIT_MASK 0x3FFFF 80 #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62) 81 #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18) 82 #define TPMI_INFO_SPEC_MASK 0x3FFFF 83 #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18) 84 #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36) 85 #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54) 86 87 /* Non HW constants */ 88 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ 89 #define RAPL_PRIMITIVE_DUMMY BIT(2) 90 91 #define TIME_WINDOW_MAX_MSEC 40000 92 #define TIME_WINDOW_MIN_MSEC 250 93 #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ 94 enum unit_type { 95 ARBITRARY_UNIT, /* no translation */ 96 POWER_UNIT, 97 ENERGY_UNIT, 98 TIME_UNIT, 99 }; 100 101 /* per domain data, some are optional */ 102 #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) 103 104 #define DOMAIN_STATE_INACTIVE BIT(0) 105 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) 106 107 static const char *pl_names[NR_POWER_LIMITS] = { 108 [POWER_LIMIT1] = "long_term", 109 [POWER_LIMIT2] = "short_term", 110 [POWER_LIMIT4] = "peak_power", 111 }; 112 113 enum pl_prims { 114 PL_ENABLE, 115 PL_CLAMP, 116 PL_LIMIT, 117 PL_TIME_WINDOW, 118 PL_MAX_POWER, 119 PL_LOCK, 120 }; 121 122 static bool is_pl_valid(struct rapl_domain *rd, int pl) 123 { 124 if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4) 125 return false; 126 return rd->rpl[pl].name ? true : false; 127 } 128 129 static int get_pl_lock_prim(struct rapl_domain *rd, int pl) 130 { 131 if (rd->rp->priv->type == RAPL_IF_TPMI) { 132 if (pl == POWER_LIMIT1) 133 return PL1_LOCK; 134 if (pl == POWER_LIMIT2) 135 return PL2_LOCK; 136 if (pl == POWER_LIMIT4) 137 return PL4_LOCK; 138 } 139 140 /* MSR/MMIO Interface doesn't have Lock bit for PL4 */ 141 if (pl == POWER_LIMIT4) 142 return -EINVAL; 143 144 /* 145 * Power Limit register that supports two power limits has a different 146 * bit position for the Lock bit. 147 */ 148 if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2)) 149 return FW_HIGH_LOCK; 150 return FW_LOCK; 151 } 152 153 static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim) 154 { 155 switch (pl) { 156 case POWER_LIMIT1: 157 if (prim == PL_ENABLE) 158 return PL1_ENABLE; 159 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 160 return PL1_CLAMP; 161 if (prim == PL_LIMIT) 162 return POWER_LIMIT1; 163 if (prim == PL_TIME_WINDOW) 164 return TIME_WINDOW1; 165 if (prim == PL_MAX_POWER) 166 return THERMAL_SPEC_POWER; 167 if (prim == PL_LOCK) 168 return get_pl_lock_prim(rd, pl); 169 return -EINVAL; 170 case POWER_LIMIT2: 171 if (prim == PL_ENABLE) 172 return PL2_ENABLE; 173 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 174 return PL2_CLAMP; 175 if (prim == PL_LIMIT) 176 return POWER_LIMIT2; 177 if (prim == PL_TIME_WINDOW) 178 return TIME_WINDOW2; 179 if (prim == PL_MAX_POWER) 180 return MAX_POWER; 181 if (prim == PL_LOCK) 182 return get_pl_lock_prim(rd, pl); 183 return -EINVAL; 184 case POWER_LIMIT4: 185 if (prim == PL_LIMIT) 186 return POWER_LIMIT4; 187 if (prim == PL_ENABLE) 188 return PL4_ENABLE; 189 /* PL4 would be around two times PL2, use same prim as PL2. */ 190 if (prim == PL_MAX_POWER) 191 return MAX_POWER; 192 if (prim == PL_LOCK) 193 return get_pl_lock_prim(rd, pl); 194 return -EINVAL; 195 default: 196 return -EINVAL; 197 } 198 } 199 200 #define power_zone_to_rapl_domain(_zone) \ 201 container_of(_zone, struct rapl_domain, power_zone) 202 203 struct rapl_defaults { 204 u8 floor_freq_reg_addr; 205 int (*check_unit)(struct rapl_domain *rd); 206 void (*set_floor_freq)(struct rapl_domain *rd, bool mode); 207 u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, 208 bool to_raw); 209 unsigned int dram_domain_energy_unit; 210 unsigned int psys_domain_energy_unit; 211 bool spr_psys_bits; 212 }; 213 static struct rapl_defaults *defaults_msr; 214 static const struct rapl_defaults defaults_tpmi; 215 216 static struct rapl_defaults *get_defaults(struct rapl_package *rp) 217 { 218 return rp->priv->defaults; 219 } 220 221 /* Sideband MBI registers */ 222 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2) 223 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf) 224 225 #define PACKAGE_PLN_INT_SAVED BIT(0) 226 #define MAX_PRIM_NAME (32) 227 228 /* per domain data. used to describe individual knobs such that access function 229 * can be consolidated into one instead of many inline functions. 230 */ 231 struct rapl_primitive_info { 232 const char *name; 233 u64 mask; 234 int shift; 235 enum rapl_domain_reg_id id; 236 enum unit_type unit; 237 u32 flag; 238 }; 239 240 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \ 241 .name = #p, \ 242 .mask = m, \ 243 .shift = s, \ 244 .id = i, \ 245 .unit = u, \ 246 .flag = f \ 247 } 248 249 static void rapl_init_domains(struct rapl_package *rp); 250 static int rapl_read_data_raw(struct rapl_domain *rd, 251 enum rapl_primitives prim, 252 bool xlate, u64 *data); 253 static int rapl_write_data_raw(struct rapl_domain *rd, 254 enum rapl_primitives prim, 255 unsigned long long value); 256 static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 257 enum pl_prims pl_prim, 258 bool xlate, u64 *data); 259 static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 260 enum pl_prims pl_prim, 261 unsigned long long value); 262 static u64 rapl_unit_xlate(struct rapl_domain *rd, 263 enum unit_type type, u64 value, int to_raw); 264 static void package_power_limit_irq_save(struct rapl_package *rp); 265 266 static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ 267 268 static const char *const rapl_domain_names[] = { 269 "package", 270 "core", 271 "uncore", 272 "dram", 273 "psys", 274 }; 275 276 static int get_energy_counter(struct powercap_zone *power_zone, 277 u64 *energy_raw) 278 { 279 struct rapl_domain *rd; 280 u64 energy_now; 281 282 /* prevent CPU hotplug, make sure the RAPL domain does not go 283 * away while reading the counter. 284 */ 285 cpus_read_lock(); 286 rd = power_zone_to_rapl_domain(power_zone); 287 288 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) { 289 *energy_raw = energy_now; 290 cpus_read_unlock(); 291 292 return 0; 293 } 294 cpus_read_unlock(); 295 296 return -EIO; 297 } 298 299 static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) 300 { 301 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); 302 303 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); 304 return 0; 305 } 306 307 static int release_zone(struct powercap_zone *power_zone) 308 { 309 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 310 struct rapl_package *rp = rd->rp; 311 312 /* package zone is the last zone of a package, we can free 313 * memory here since all children has been unregistered. 314 */ 315 if (rd->id == RAPL_DOMAIN_PACKAGE) { 316 kfree(rd); 317 rp->domains = NULL; 318 } 319 320 return 0; 321 322 } 323 324 static int find_nr_power_limit(struct rapl_domain *rd) 325 { 326 int i, nr_pl = 0; 327 328 for (i = 0; i < NR_POWER_LIMITS; i++) { 329 if (is_pl_valid(rd, i)) 330 nr_pl++; 331 } 332 333 return nr_pl; 334 } 335 336 static int set_domain_enable(struct powercap_zone *power_zone, bool mode) 337 { 338 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 339 struct rapl_defaults *defaults = get_defaults(rd->rp); 340 int ret; 341 342 cpus_read_lock(); 343 ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); 344 if (!ret && defaults->set_floor_freq) 345 defaults->set_floor_freq(rd, mode); 346 cpus_read_unlock(); 347 348 return ret; 349 } 350 351 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) 352 { 353 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 354 u64 val; 355 int ret; 356 357 if (rd->rpl[POWER_LIMIT1].locked) { 358 *mode = false; 359 return 0; 360 } 361 cpus_read_lock(); 362 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val); 363 if (!ret) 364 *mode = val; 365 cpus_read_unlock(); 366 367 return ret; 368 } 369 370 /* per RAPL domain ops, in the order of rapl_domain_type */ 371 static const struct powercap_zone_ops zone_ops[] = { 372 /* RAPL_DOMAIN_PACKAGE */ 373 { 374 .get_energy_uj = get_energy_counter, 375 .get_max_energy_range_uj = get_max_energy_counter, 376 .release = release_zone, 377 .set_enable = set_domain_enable, 378 .get_enable = get_domain_enable, 379 }, 380 /* RAPL_DOMAIN_PP0 */ 381 { 382 .get_energy_uj = get_energy_counter, 383 .get_max_energy_range_uj = get_max_energy_counter, 384 .release = release_zone, 385 .set_enable = set_domain_enable, 386 .get_enable = get_domain_enable, 387 }, 388 /* RAPL_DOMAIN_PP1 */ 389 { 390 .get_energy_uj = get_energy_counter, 391 .get_max_energy_range_uj = get_max_energy_counter, 392 .release = release_zone, 393 .set_enable = set_domain_enable, 394 .get_enable = get_domain_enable, 395 }, 396 /* RAPL_DOMAIN_DRAM */ 397 { 398 .get_energy_uj = get_energy_counter, 399 .get_max_energy_range_uj = get_max_energy_counter, 400 .release = release_zone, 401 .set_enable = set_domain_enable, 402 .get_enable = get_domain_enable, 403 }, 404 /* RAPL_DOMAIN_PLATFORM */ 405 { 406 .get_energy_uj = get_energy_counter, 407 .get_max_energy_range_uj = get_max_energy_counter, 408 .release = release_zone, 409 .set_enable = set_domain_enable, 410 .get_enable = get_domain_enable, 411 }, 412 }; 413 414 /* 415 * Constraint index used by powercap can be different than power limit (PL) 416 * index in that some PLs maybe missing due to non-existent MSRs. So we 417 * need to convert here by finding the valid PLs only (name populated). 418 */ 419 static int contraint_to_pl(struct rapl_domain *rd, int cid) 420 { 421 int i, j; 422 423 for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) { 424 if (is_pl_valid(rd, i) && j++ == cid) { 425 pr_debug("%s: index %d\n", __func__, i); 426 return i; 427 } 428 } 429 pr_err("Cannot find matching power limit for constraint %d\n", cid); 430 431 return -EINVAL; 432 } 433 434 static int set_power_limit(struct powercap_zone *power_zone, int cid, 435 u64 power_limit) 436 { 437 struct rapl_domain *rd; 438 struct rapl_package *rp; 439 int ret = 0; 440 int id; 441 442 cpus_read_lock(); 443 rd = power_zone_to_rapl_domain(power_zone); 444 id = contraint_to_pl(rd, cid); 445 rp = rd->rp; 446 447 ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit); 448 if (!ret) 449 package_power_limit_irq_save(rp); 450 cpus_read_unlock(); 451 return ret; 452 } 453 454 static int get_current_power_limit(struct powercap_zone *power_zone, int cid, 455 u64 *data) 456 { 457 struct rapl_domain *rd; 458 u64 val; 459 int ret = 0; 460 int id; 461 462 cpus_read_lock(); 463 rd = power_zone_to_rapl_domain(power_zone); 464 id = contraint_to_pl(rd, cid); 465 466 ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val); 467 if (!ret) 468 *data = val; 469 470 cpus_read_unlock(); 471 472 return ret; 473 } 474 475 static int set_time_window(struct powercap_zone *power_zone, int cid, 476 u64 window) 477 { 478 struct rapl_domain *rd; 479 int ret = 0; 480 int id; 481 482 cpus_read_lock(); 483 rd = power_zone_to_rapl_domain(power_zone); 484 id = contraint_to_pl(rd, cid); 485 486 ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window); 487 488 cpus_read_unlock(); 489 return ret; 490 } 491 492 static int get_time_window(struct powercap_zone *power_zone, int cid, 493 u64 *data) 494 { 495 struct rapl_domain *rd; 496 u64 val; 497 int ret = 0; 498 int id; 499 500 cpus_read_lock(); 501 rd = power_zone_to_rapl_domain(power_zone); 502 id = contraint_to_pl(rd, cid); 503 504 ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val); 505 if (!ret) 506 *data = val; 507 508 cpus_read_unlock(); 509 510 return ret; 511 } 512 513 static const char *get_constraint_name(struct powercap_zone *power_zone, 514 int cid) 515 { 516 struct rapl_domain *rd; 517 int id; 518 519 rd = power_zone_to_rapl_domain(power_zone); 520 id = contraint_to_pl(rd, cid); 521 if (id >= 0) 522 return rd->rpl[id].name; 523 524 return NULL; 525 } 526 527 static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data) 528 { 529 struct rapl_domain *rd; 530 u64 val; 531 int ret = 0; 532 int id; 533 534 cpus_read_lock(); 535 rd = power_zone_to_rapl_domain(power_zone); 536 id = contraint_to_pl(rd, cid); 537 538 ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val); 539 if (!ret) 540 *data = val; 541 542 /* As a generalization rule, PL4 would be around two times PL2. */ 543 if (id == POWER_LIMIT4) 544 *data = *data * 2; 545 546 cpus_read_unlock(); 547 548 return ret; 549 } 550 551 static const struct powercap_zone_constraint_ops constraint_ops = { 552 .set_power_limit_uw = set_power_limit, 553 .get_power_limit_uw = get_current_power_limit, 554 .set_time_window_us = set_time_window, 555 .get_time_window_us = get_time_window, 556 .get_max_power_uw = get_max_power, 557 .get_name = get_constraint_name, 558 }; 559 560 /* Return the id used for read_raw/write_raw callback */ 561 static int get_rid(struct rapl_package *rp) 562 { 563 return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id; 564 } 565 566 /* called after domain detection and package level data are set */ 567 static void rapl_init_domains(struct rapl_package *rp) 568 { 569 enum rapl_domain_type i; 570 enum rapl_domain_reg_id j; 571 struct rapl_domain *rd = rp->domains; 572 573 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 574 unsigned int mask = rp->domain_map & (1 << i); 575 int t; 576 577 if (!mask) 578 continue; 579 580 rd->rp = rp; 581 582 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) { 583 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d", 584 rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) : 585 rp->id); 586 } else { 587 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s", 588 rapl_domain_names[i]); 589 } 590 591 rd->id = i; 592 593 /* PL1 is supported by default */ 594 rp->priv->limits[i] |= BIT(POWER_LIMIT1); 595 596 for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) { 597 if (rp->priv->limits[i] & BIT(t)) 598 rd->rpl[t].name = pl_names[t]; 599 } 600 601 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) 602 rd->regs[j] = rp->priv->regs[i][j]; 603 604 rd++; 605 } 606 } 607 608 static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, 609 u64 value, int to_raw) 610 { 611 u64 units = 1; 612 struct rapl_defaults *defaults = get_defaults(rd->rp); 613 u64 scale = 1; 614 615 switch (type) { 616 case POWER_UNIT: 617 units = rd->power_unit; 618 break; 619 case ENERGY_UNIT: 620 scale = ENERGY_UNIT_SCALE; 621 units = rd->energy_unit; 622 break; 623 case TIME_UNIT: 624 return defaults->compute_time_window(rd, value, to_raw); 625 case ARBITRARY_UNIT: 626 default: 627 return value; 628 } 629 630 if (to_raw) 631 return div64_u64(value, units) * scale; 632 633 value *= units; 634 635 return div64_u64(value, scale); 636 } 637 638 /* RAPL primitives for MSR and MMIO I/F */ 639 static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = { 640 /* name, mask, shift, msr index, unit divisor */ 641 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, 642 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 643 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, 644 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 645 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, 646 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 647 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 648 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 649 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, 650 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 651 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63, 652 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 653 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, 654 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 655 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, 656 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 657 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, 658 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 659 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, 660 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 661 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, 662 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 663 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, 664 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 665 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, 666 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 667 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, 668 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 669 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, 670 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 671 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, 672 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 673 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 674 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 675 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, 676 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), 677 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, 678 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 679 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, 680 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 681 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, 682 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 683 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, 684 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 685 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, 686 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 687 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, 688 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 689 /* non-hardware */ 690 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, 691 RAPL_PRIMITIVE_DERIVED), 692 }; 693 694 /* RAPL primitives for TPMI I/F */ 695 static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = { 696 /* name, mask, shift, msr index, unit divisor */ 697 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0, 698 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 699 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0, 700 RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0), 701 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0, 702 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 703 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 704 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 705 [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63, 706 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 707 [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63, 708 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 709 [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63, 710 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 711 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 712 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 713 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 714 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 715 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 716 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 717 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18, 718 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 719 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18, 720 RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0), 721 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0, 722 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 723 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36, 724 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 725 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18, 726 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 727 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54, 728 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 729 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 730 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 731 /* non-hardware */ 732 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, 733 POWER_UNIT, RAPL_PRIMITIVE_DERIVED), 734 }; 735 736 static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) 737 { 738 struct rapl_primitive_info *rpi = rp->priv->rpi; 739 740 if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) 741 return NULL; 742 743 return &rpi[prim]; 744 } 745 746 static int rapl_config(struct rapl_package *rp) 747 { 748 switch (rp->priv->type) { 749 /* MMIO I/F shares the same register layout as MSR registers */ 750 case RAPL_IF_MMIO: 751 case RAPL_IF_MSR: 752 rp->priv->defaults = (void *)defaults_msr; 753 rp->priv->rpi = (void *)rpi_msr; 754 break; 755 case RAPL_IF_TPMI: 756 rp->priv->defaults = (void *)&defaults_tpmi; 757 rp->priv->rpi = (void *)rpi_tpmi; 758 break; 759 default: 760 return -EINVAL; 761 } 762 return 0; 763 } 764 765 static enum rapl_primitives 766 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) 767 { 768 struct rapl_defaults *defaults = get_defaults(rd->rp); 769 770 if (!defaults->spr_psys_bits) 771 return prim; 772 773 if (rd->id != RAPL_DOMAIN_PLATFORM) 774 return prim; 775 776 switch (prim) { 777 case POWER_LIMIT1: 778 return PSYS_POWER_LIMIT1; 779 case POWER_LIMIT2: 780 return PSYS_POWER_LIMIT2; 781 case PL1_ENABLE: 782 return PSYS_PL1_ENABLE; 783 case PL2_ENABLE: 784 return PSYS_PL2_ENABLE; 785 case TIME_WINDOW1: 786 return PSYS_TIME_WINDOW1; 787 case TIME_WINDOW2: 788 return PSYS_TIME_WINDOW2; 789 default: 790 return prim; 791 } 792 } 793 794 /* Read primitive data based on its related struct rapl_primitive_info. 795 * if xlate flag is set, return translated data based on data units, i.e. 796 * time, energy, and power. 797 * RAPL MSRs are non-architectual and are laid out not consistently across 798 * domains. Here we use primitive info to allow writing consolidated access 799 * functions. 800 * For a given primitive, it is processed by MSR mask and shift. Unit conversion 801 * is pre-assigned based on RAPL unit MSRs read at init time. 802 * 63-------------------------- 31--------------------------- 0 803 * | xxxxx (mask) | 804 * | |<- shift ----------------| 805 * 63-------------------------- 31--------------------------- 0 806 */ 807 static int rapl_read_data_raw(struct rapl_domain *rd, 808 enum rapl_primitives prim, bool xlate, u64 *data) 809 { 810 u64 value; 811 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 812 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 813 struct reg_action ra; 814 815 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 816 return -EINVAL; 817 818 ra.reg = rd->regs[rpi->id]; 819 if (!ra.reg.val) 820 return -EINVAL; 821 822 /* non-hardware data are collected by the polling thread */ 823 if (rpi->flag & RAPL_PRIMITIVE_DERIVED) { 824 *data = rd->rdd.primitives[prim]; 825 return 0; 826 } 827 828 ra.mask = rpi->mask; 829 830 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 831 pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name); 832 return -EIO; 833 } 834 835 value = ra.value >> rpi->shift; 836 837 if (xlate) 838 *data = rapl_unit_xlate(rd, rpi->unit, value, 0); 839 else 840 *data = value; 841 842 return 0; 843 } 844 845 /* Similar use of primitive info in the read counterpart */ 846 static int rapl_write_data_raw(struct rapl_domain *rd, 847 enum rapl_primitives prim, 848 unsigned long long value) 849 { 850 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 851 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 852 u64 bits; 853 struct reg_action ra; 854 int ret; 855 856 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 857 return -EINVAL; 858 859 bits = rapl_unit_xlate(rd, rpi->unit, value, 1); 860 bits <<= rpi->shift; 861 bits &= rpi->mask; 862 863 memset(&ra, 0, sizeof(ra)); 864 865 ra.reg = rd->regs[rpi->id]; 866 ra.mask = rpi->mask; 867 ra.value = bits; 868 869 ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra); 870 871 return ret; 872 } 873 874 static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 875 enum pl_prims pl_prim, bool xlate, u64 *data) 876 { 877 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 878 879 if (!is_pl_valid(rd, pl)) 880 return -EINVAL; 881 882 return rapl_read_data_raw(rd, prim, xlate, data); 883 } 884 885 static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 886 enum pl_prims pl_prim, 887 unsigned long long value) 888 { 889 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 890 891 if (!is_pl_valid(rd, pl)) 892 return -EINVAL; 893 894 if (rd->rpl[pl].locked) { 895 pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); 896 return -EACCES; 897 } 898 899 return rapl_write_data_raw(rd, prim, value); 900 } 901 /* 902 * Raw RAPL data stored in MSRs are in certain scales. We need to 903 * convert them into standard units based on the units reported in 904 * the RAPL unit MSRs. This is specific to CPUs as the method to 905 * calculate units differ on different CPUs. 906 * We convert the units to below format based on CPUs. 907 * i.e. 908 * energy unit: picoJoules : Represented in picoJoules by default 909 * power unit : microWatts : Represented in milliWatts by default 910 * time unit : microseconds: Represented in seconds by default 911 */ 912 static int rapl_check_unit_core(struct rapl_domain *rd) 913 { 914 struct reg_action ra; 915 u32 value; 916 917 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 918 ra.mask = ~0; 919 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 920 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 921 ra.reg.val, rd->rp->name, rd->name); 922 return -ENODEV; 923 } 924 925 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 926 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 927 928 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 929 rd->power_unit = 1000000 / (1 << value); 930 931 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 932 rd->time_unit = 1000000 / (1 << value); 933 934 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 935 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 936 937 return 0; 938 } 939 940 static int rapl_check_unit_atom(struct rapl_domain *rd) 941 { 942 struct reg_action ra; 943 u32 value; 944 945 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 946 ra.mask = ~0; 947 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 948 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 949 ra.reg.val, rd->rp->name, rd->name); 950 return -ENODEV; 951 } 952 953 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 954 rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value; 955 956 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 957 rd->power_unit = (1 << value) * 1000; 958 959 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 960 rd->time_unit = 1000000 / (1 << value); 961 962 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n", 963 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 964 965 return 0; 966 } 967 968 static void power_limit_irq_save_cpu(void *info) 969 { 970 u32 l, h = 0; 971 struct rapl_package *rp = (struct rapl_package *)info; 972 973 /* save the state of PLN irq mask bit before disabling it */ 974 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 975 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { 976 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; 977 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; 978 } 979 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 980 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 981 } 982 983 /* REVISIT: 984 * When package power limit is set artificially low by RAPL, LVT 985 * thermal interrupt for package power limit should be ignored 986 * since we are not really exceeding the real limit. The intention 987 * is to avoid excessive interrupts while we are trying to save power. 988 * A useful feature might be routing the package_power_limit interrupt 989 * to userspace via eventfd. once we have a usecase, this is simple 990 * to do by adding an atomic notifier. 991 */ 992 993 static void package_power_limit_irq_save(struct rapl_package *rp) 994 { 995 if (rp->lead_cpu < 0) 996 return; 997 998 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 999 return; 1000 1001 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); 1002 } 1003 1004 /* 1005 * Restore per package power limit interrupt enable state. Called from cpu 1006 * hotplug code on package removal. 1007 */ 1008 static void package_power_limit_irq_restore(struct rapl_package *rp) 1009 { 1010 u32 l, h; 1011 1012 if (rp->lead_cpu < 0) 1013 return; 1014 1015 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 1016 return; 1017 1018 /* irq enable state not saved, nothing to restore */ 1019 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) 1020 return; 1021 1022 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 1023 1024 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) 1025 l |= PACKAGE_THERM_INT_PLN_ENABLE; 1026 else 1027 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 1028 1029 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 1030 } 1031 1032 static void set_floor_freq_default(struct rapl_domain *rd, bool mode) 1033 { 1034 int i; 1035 1036 /* always enable clamp such that p-state can go below OS requested 1037 * range. power capping priority over guranteed frequency. 1038 */ 1039 rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode); 1040 1041 for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) { 1042 rapl_write_pl_data(rd, i, PL_ENABLE, mode); 1043 rapl_write_pl_data(rd, i, PL_CLAMP, mode); 1044 } 1045 } 1046 1047 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) 1048 { 1049 static u32 power_ctrl_orig_val; 1050 struct rapl_defaults *defaults = get_defaults(rd->rp); 1051 u32 mdata; 1052 1053 if (!defaults->floor_freq_reg_addr) { 1054 pr_err("Invalid floor frequency config register\n"); 1055 return; 1056 } 1057 1058 if (!power_ctrl_orig_val) 1059 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ, 1060 defaults->floor_freq_reg_addr, 1061 &power_ctrl_orig_val); 1062 mdata = power_ctrl_orig_val; 1063 if (enable) { 1064 mdata &= ~(0x7f << 8); 1065 mdata |= 1 << 8; 1066 } 1067 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE, 1068 defaults->floor_freq_reg_addr, mdata); 1069 } 1070 1071 static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value, 1072 bool to_raw) 1073 { 1074 u64 f, y; /* fraction and exp. used for time unit */ 1075 1076 /* 1077 * Special processing based on 2^Y*(1+F/4), refer 1078 * to Intel Software Developer's manual Vol.3B: CH 14.9.3. 1079 */ 1080 if (!to_raw) { 1081 f = (value & 0x60) >> 5; 1082 y = value & 0x1f; 1083 value = (1 << y) * (4 + f) * rd->time_unit / 4; 1084 } else { 1085 if (value < rd->time_unit) 1086 return 0; 1087 1088 do_div(value, rd->time_unit); 1089 y = ilog2(value); 1090 1091 /* 1092 * The target hardware field is 7 bits wide, so return all ones 1093 * if the exponent is too large. 1094 */ 1095 if (y > 0x1f) 1096 return 0x7f; 1097 1098 f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y); 1099 value = (y & 0x1f) | ((f & 0x3) << 5); 1100 } 1101 return value; 1102 } 1103 1104 static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value, 1105 bool to_raw) 1106 { 1107 /* 1108 * Atom time unit encoding is straight forward val * time_unit, 1109 * where time_unit is default to 1 sec. Never 0. 1110 */ 1111 if (!to_raw) 1112 return (value) ? value * rd->time_unit : rd->time_unit; 1113 1114 value = div64_u64(value, rd->time_unit); 1115 1116 return value; 1117 } 1118 1119 /* TPMI Unit register has different layout */ 1120 #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET 1121 #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK 1122 #define TPMI_ENERGY_UNIT_OFFSET 0x06 1123 #define TPMI_ENERGY_UNIT_MASK 0x7C0 1124 #define TPMI_TIME_UNIT_OFFSET 0x0C 1125 #define TPMI_TIME_UNIT_MASK 0xF000 1126 1127 static int rapl_check_unit_tpmi(struct rapl_domain *rd) 1128 { 1129 struct reg_action ra; 1130 u32 value; 1131 1132 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 1133 ra.mask = ~0; 1134 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 1135 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 1136 ra.reg.val, rd->rp->name, rd->name); 1137 return -ENODEV; 1138 } 1139 1140 value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET; 1141 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 1142 1143 value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET; 1144 rd->power_unit = 1000000 / (1 << value); 1145 1146 value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET; 1147 rd->time_unit = 1000000 / (1 << value); 1148 1149 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 1150 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 1151 1152 return 0; 1153 } 1154 1155 static const struct rapl_defaults defaults_tpmi = { 1156 .check_unit = rapl_check_unit_tpmi, 1157 /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */ 1158 .set_floor_freq = set_floor_freq_default, 1159 .compute_time_window = rapl_compute_time_window_core, 1160 }; 1161 1162 static const struct rapl_defaults rapl_defaults_core = { 1163 .floor_freq_reg_addr = 0, 1164 .check_unit = rapl_check_unit_core, 1165 .set_floor_freq = set_floor_freq_default, 1166 .compute_time_window = rapl_compute_time_window_core, 1167 }; 1168 1169 static const struct rapl_defaults rapl_defaults_hsw_server = { 1170 .check_unit = rapl_check_unit_core, 1171 .set_floor_freq = set_floor_freq_default, 1172 .compute_time_window = rapl_compute_time_window_core, 1173 .dram_domain_energy_unit = 15300, 1174 }; 1175 1176 static const struct rapl_defaults rapl_defaults_spr_server = { 1177 .check_unit = rapl_check_unit_core, 1178 .set_floor_freq = set_floor_freq_default, 1179 .compute_time_window = rapl_compute_time_window_core, 1180 .psys_domain_energy_unit = 1000000000, 1181 .spr_psys_bits = true, 1182 }; 1183 1184 static const struct rapl_defaults rapl_defaults_byt = { 1185 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, 1186 .check_unit = rapl_check_unit_atom, 1187 .set_floor_freq = set_floor_freq_atom, 1188 .compute_time_window = rapl_compute_time_window_atom, 1189 }; 1190 1191 static const struct rapl_defaults rapl_defaults_tng = { 1192 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG, 1193 .check_unit = rapl_check_unit_atom, 1194 .set_floor_freq = set_floor_freq_atom, 1195 .compute_time_window = rapl_compute_time_window_atom, 1196 }; 1197 1198 static const struct rapl_defaults rapl_defaults_ann = { 1199 .floor_freq_reg_addr = 0, 1200 .check_unit = rapl_check_unit_atom, 1201 .set_floor_freq = NULL, 1202 .compute_time_window = rapl_compute_time_window_atom, 1203 }; 1204 1205 static const struct rapl_defaults rapl_defaults_cht = { 1206 .floor_freq_reg_addr = 0, 1207 .check_unit = rapl_check_unit_atom, 1208 .set_floor_freq = NULL, 1209 .compute_time_window = rapl_compute_time_window_atom, 1210 }; 1211 1212 static const struct rapl_defaults rapl_defaults_amd = { 1213 .check_unit = rapl_check_unit_core, 1214 }; 1215 1216 static const struct x86_cpu_id rapl_ids[] __initconst = { 1217 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), 1218 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), 1219 1220 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core), 1221 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core), 1222 1223 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core), 1224 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core), 1225 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core), 1226 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server), 1227 1228 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core), 1229 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core), 1230 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core), 1231 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server), 1232 1233 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core), 1234 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core), 1235 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server), 1236 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core), 1237 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core), 1238 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core), 1239 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core), 1240 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core), 1241 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core), 1242 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server), 1243 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server), 1244 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), 1245 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), 1246 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), 1247 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), 1248 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), 1249 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), 1250 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core), 1251 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &rapl_defaults_core), 1252 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), 1253 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core), 1254 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core), 1255 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core), 1256 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), 1257 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), 1258 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server), 1259 X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), 1260 1261 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), 1262 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), 1263 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng), 1264 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann), 1265 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), 1266 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), 1267 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), 1268 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core), 1269 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), 1270 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), 1271 1272 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), 1273 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), 1274 1275 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), 1276 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), 1277 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd), 1278 {} 1279 }; 1280 MODULE_DEVICE_TABLE(x86cpu, rapl_ids); 1281 1282 /* Read once for all raw primitive data for domains */ 1283 static void rapl_update_domain_data(struct rapl_package *rp) 1284 { 1285 int dmn, prim; 1286 u64 val; 1287 1288 for (dmn = 0; dmn < rp->nr_domains; dmn++) { 1289 pr_debug("update %s domain %s data\n", rp->name, 1290 rp->domains[dmn].name); 1291 /* exclude non-raw primitives */ 1292 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { 1293 struct rapl_primitive_info *rpi = get_rpi(rp, prim); 1294 1295 if (!rapl_read_data_raw(&rp->domains[dmn], prim, 1296 rpi->unit, &val)) 1297 rp->domains[dmn].rdd.primitives[prim] = val; 1298 } 1299 } 1300 1301 } 1302 1303 static int rapl_package_register_powercap(struct rapl_package *rp) 1304 { 1305 struct rapl_domain *rd; 1306 struct powercap_zone *power_zone = NULL; 1307 int nr_pl, ret; 1308 1309 /* Update the domain data of the new package */ 1310 rapl_update_domain_data(rp); 1311 1312 /* first we register package domain as the parent zone */ 1313 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1314 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1315 nr_pl = find_nr_power_limit(rd); 1316 pr_debug("register package domain %s\n", rp->name); 1317 power_zone = powercap_register_zone(&rd->power_zone, 1318 rp->priv->control_type, rp->name, 1319 NULL, &zone_ops[rd->id], nr_pl, 1320 &constraint_ops); 1321 if (IS_ERR(power_zone)) { 1322 pr_debug("failed to register power zone %s\n", 1323 rp->name); 1324 return PTR_ERR(power_zone); 1325 } 1326 /* track parent zone in per package/socket data */ 1327 rp->power_zone = power_zone; 1328 /* done, only one package domain per socket */ 1329 break; 1330 } 1331 } 1332 if (!power_zone) { 1333 pr_err("no package domain found, unknown topology!\n"); 1334 return -ENODEV; 1335 } 1336 /* now register domains as children of the socket/package */ 1337 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1338 struct powercap_zone *parent = rp->power_zone; 1339 1340 if (rd->id == RAPL_DOMAIN_PACKAGE) 1341 continue; 1342 if (rd->id == RAPL_DOMAIN_PLATFORM) 1343 parent = NULL; 1344 /* number of power limits per domain varies */ 1345 nr_pl = find_nr_power_limit(rd); 1346 power_zone = powercap_register_zone(&rd->power_zone, 1347 rp->priv->control_type, 1348 rd->name, parent, 1349 &zone_ops[rd->id], nr_pl, 1350 &constraint_ops); 1351 1352 if (IS_ERR(power_zone)) { 1353 pr_debug("failed to register power_zone, %s:%s\n", 1354 rp->name, rd->name); 1355 ret = PTR_ERR(power_zone); 1356 goto err_cleanup; 1357 } 1358 } 1359 return 0; 1360 1361 err_cleanup: 1362 /* 1363 * Clean up previously initialized domains within the package if we 1364 * failed after the first domain setup. 1365 */ 1366 while (--rd >= rp->domains) { 1367 pr_debug("unregister %s domain %s\n", rp->name, rd->name); 1368 powercap_unregister_zone(rp->priv->control_type, 1369 &rd->power_zone); 1370 } 1371 1372 return ret; 1373 } 1374 1375 static int rapl_check_domain(int domain, struct rapl_package *rp) 1376 { 1377 struct reg_action ra; 1378 1379 switch (domain) { 1380 case RAPL_DOMAIN_PACKAGE: 1381 case RAPL_DOMAIN_PP0: 1382 case RAPL_DOMAIN_PP1: 1383 case RAPL_DOMAIN_DRAM: 1384 case RAPL_DOMAIN_PLATFORM: 1385 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; 1386 break; 1387 default: 1388 pr_err("invalid domain id %d\n", domain); 1389 return -EINVAL; 1390 } 1391 /* make sure domain counters are available and contains non-zero 1392 * values, otherwise skip it. 1393 */ 1394 1395 ra.mask = ENERGY_STATUS_MASK; 1396 if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value) 1397 return -ENODEV; 1398 1399 return 0; 1400 } 1401 1402 /* 1403 * Get per domain energy/power/time unit. 1404 * RAPL Interfaces without per domain unit register will use the package 1405 * scope unit register to set per domain units. 1406 */ 1407 static int rapl_get_domain_unit(struct rapl_domain *rd) 1408 { 1409 struct rapl_defaults *defaults = get_defaults(rd->rp); 1410 int ret; 1411 1412 if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) { 1413 if (!rd->rp->priv->reg_unit.val) { 1414 pr_err("No valid Unit register found\n"); 1415 return -ENODEV; 1416 } 1417 rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit; 1418 } 1419 1420 if (!defaults->check_unit) { 1421 pr_err("missing .check_unit() callback\n"); 1422 return -ENODEV; 1423 } 1424 1425 ret = defaults->check_unit(rd); 1426 if (ret) 1427 return ret; 1428 1429 if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit) 1430 rd->energy_unit = defaults->dram_domain_energy_unit; 1431 if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit) 1432 rd->energy_unit = defaults->psys_domain_energy_unit; 1433 return 0; 1434 } 1435 1436 /* 1437 * Check if power limits are available. Two cases when they are not available: 1438 * 1. Locked by BIOS, in this case we still provide read-only access so that 1439 * users can see what limit is set by the BIOS. 1440 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not 1441 * exist at all. In this case, we do not show the constraints in powercap. 1442 * 1443 * Called after domains are detected and initialized. 1444 */ 1445 static void rapl_detect_powerlimit(struct rapl_domain *rd) 1446 { 1447 u64 val64; 1448 int i; 1449 1450 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1451 if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) { 1452 if (val64) { 1453 rd->rpl[i].locked = true; 1454 pr_info("%s:%s:%s locked by BIOS\n", 1455 rd->rp->name, rd->name, pl_names[i]); 1456 } 1457 } 1458 1459 if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64)) 1460 rd->rpl[i].name = NULL; 1461 } 1462 } 1463 1464 /* Detect active and valid domains for the given CPU, caller must 1465 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. 1466 */ 1467 static int rapl_detect_domains(struct rapl_package *rp) 1468 { 1469 struct rapl_domain *rd; 1470 int i; 1471 1472 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 1473 /* use physical package id to read counters */ 1474 if (!rapl_check_domain(i, rp)) { 1475 rp->domain_map |= 1 << i; 1476 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); 1477 } 1478 } 1479 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); 1480 if (!rp->nr_domains) { 1481 pr_debug("no valid rapl domains found in %s\n", rp->name); 1482 return -ENODEV; 1483 } 1484 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); 1485 1486 rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain), 1487 GFP_KERNEL); 1488 if (!rp->domains) 1489 return -ENOMEM; 1490 1491 rapl_init_domains(rp); 1492 1493 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1494 rapl_get_domain_unit(rd); 1495 rapl_detect_powerlimit(rd); 1496 } 1497 1498 return 0; 1499 } 1500 1501 /* called from CPU hotplug notifier, hotplug lock held */ 1502 void rapl_remove_package(struct rapl_package *rp) 1503 { 1504 struct rapl_domain *rd, *rd_package = NULL; 1505 1506 package_power_limit_irq_restore(rp); 1507 1508 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1509 int i; 1510 1511 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1512 rapl_write_pl_data(rd, i, PL_ENABLE, 0); 1513 rapl_write_pl_data(rd, i, PL_CLAMP, 0); 1514 } 1515 1516 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1517 rd_package = rd; 1518 continue; 1519 } 1520 pr_debug("remove package, undo power limit on %s: %s\n", 1521 rp->name, rd->name); 1522 powercap_unregister_zone(rp->priv->control_type, 1523 &rd->power_zone); 1524 } 1525 /* do parent zone last */ 1526 powercap_unregister_zone(rp->priv->control_type, 1527 &rd_package->power_zone); 1528 list_del(&rp->plist); 1529 kfree(rp); 1530 } 1531 EXPORT_SYMBOL_GPL(rapl_remove_package); 1532 1533 /* caller to ensure CPU hotplug lock is held */ 1534 struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1535 { 1536 struct rapl_package *rp; 1537 int uid; 1538 1539 if (id_is_cpu) 1540 uid = topology_logical_die_id(id); 1541 else 1542 uid = id; 1543 1544 list_for_each_entry(rp, &rapl_packages, plist) { 1545 if (rp->id == uid 1546 && rp->priv->control_type == priv->control_type) 1547 return rp; 1548 } 1549 1550 return NULL; 1551 } 1552 EXPORT_SYMBOL_GPL(rapl_find_package_domain); 1553 1554 /* called from CPU hotplug notifier, hotplug lock held */ 1555 struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1556 { 1557 struct rapl_package *rp; 1558 int ret; 1559 1560 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); 1561 if (!rp) 1562 return ERR_PTR(-ENOMEM); 1563 1564 if (id_is_cpu) { 1565 rp->id = topology_logical_die_id(id); 1566 rp->lead_cpu = id; 1567 if (topology_max_die_per_package() > 1) 1568 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", 1569 topology_physical_package_id(id), topology_die_id(id)); 1570 else 1571 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", 1572 topology_physical_package_id(id)); 1573 } else { 1574 rp->id = id; 1575 rp->lead_cpu = -1; 1576 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id); 1577 } 1578 1579 rp->priv = priv; 1580 ret = rapl_config(rp); 1581 if (ret) 1582 goto err_free_package; 1583 1584 /* check if the package contains valid domains */ 1585 if (rapl_detect_domains(rp)) { 1586 ret = -ENODEV; 1587 goto err_free_package; 1588 } 1589 ret = rapl_package_register_powercap(rp); 1590 if (!ret) { 1591 INIT_LIST_HEAD(&rp->plist); 1592 list_add(&rp->plist, &rapl_packages); 1593 return rp; 1594 } 1595 1596 err_free_package: 1597 kfree(rp->domains); 1598 kfree(rp); 1599 return ERR_PTR(ret); 1600 } 1601 EXPORT_SYMBOL_GPL(rapl_add_package); 1602 1603 static void power_limit_state_save(void) 1604 { 1605 struct rapl_package *rp; 1606 struct rapl_domain *rd; 1607 int ret, i; 1608 1609 cpus_read_lock(); 1610 list_for_each_entry(rp, &rapl_packages, plist) { 1611 if (!rp->power_zone) 1612 continue; 1613 rd = power_zone_to_rapl_domain(rp->power_zone); 1614 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1615 ret = rapl_read_pl_data(rd, i, PL_LIMIT, true, 1616 &rd->rpl[i].last_power_limit); 1617 if (ret) 1618 rd->rpl[i].last_power_limit = 0; 1619 } 1620 } 1621 cpus_read_unlock(); 1622 } 1623 1624 static void power_limit_state_restore(void) 1625 { 1626 struct rapl_package *rp; 1627 struct rapl_domain *rd; 1628 int i; 1629 1630 cpus_read_lock(); 1631 list_for_each_entry(rp, &rapl_packages, plist) { 1632 if (!rp->power_zone) 1633 continue; 1634 rd = power_zone_to_rapl_domain(rp->power_zone); 1635 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) 1636 if (rd->rpl[i].last_power_limit) 1637 rapl_write_pl_data(rd, i, PL_LIMIT, 1638 rd->rpl[i].last_power_limit); 1639 } 1640 cpus_read_unlock(); 1641 } 1642 1643 static int rapl_pm_callback(struct notifier_block *nb, 1644 unsigned long mode, void *_unused) 1645 { 1646 switch (mode) { 1647 case PM_SUSPEND_PREPARE: 1648 power_limit_state_save(); 1649 break; 1650 case PM_POST_SUSPEND: 1651 power_limit_state_restore(); 1652 break; 1653 } 1654 return NOTIFY_OK; 1655 } 1656 1657 static struct notifier_block rapl_pm_notifier = { 1658 .notifier_call = rapl_pm_callback, 1659 }; 1660 1661 static struct platform_device *rapl_msr_platdev; 1662 1663 static int __init rapl_init(void) 1664 { 1665 const struct x86_cpu_id *id; 1666 int ret; 1667 1668 id = x86_match_cpu(rapl_ids); 1669 if (id) { 1670 defaults_msr = (struct rapl_defaults *)id->driver_data; 1671 1672 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); 1673 if (!rapl_msr_platdev) 1674 return -ENOMEM; 1675 1676 ret = platform_device_add(rapl_msr_platdev); 1677 if (ret) { 1678 platform_device_put(rapl_msr_platdev); 1679 return ret; 1680 } 1681 } 1682 1683 ret = register_pm_notifier(&rapl_pm_notifier); 1684 if (ret && rapl_msr_platdev) { 1685 platform_device_del(rapl_msr_platdev); 1686 platform_device_put(rapl_msr_platdev); 1687 } 1688 1689 return ret; 1690 } 1691 1692 static void __exit rapl_exit(void) 1693 { 1694 platform_device_unregister(rapl_msr_platdev); 1695 unregister_pm_notifier(&rapl_pm_notifier); 1696 } 1697 1698 fs_initcall(rapl_init); 1699 module_exit(rapl_exit); 1700 1701 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); 1702 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>"); 1703 MODULE_LICENSE("GPL v2"); 1704