1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Common code for Intel Running Average Power Limit (RAPL) support.
4 * Copyright (c) 2019, Intel Corporation.
5 */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/cleanup.h>
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/list.h>
12 #include <linux/types.h>
13 #include <linux/device.h>
14 #include <linux/slab.h>
15 #include <linux/log2.h>
16 #include <linux/bitmap.h>
17 #include <linux/delay.h>
18 #include <linux/sysfs.h>
19 #include <linux/cpu.h>
20 #include <linux/powercap.h>
21 #include <linux/suspend.h>
22 #include <linux/intel_rapl.h>
23 #include <linux/processor.h>
24 #include <linux/platform_device.h>
25
26 #include <asm/iosf_mbi.h>
27 #include <asm/cpu_device_id.h>
28 #include <asm/intel-family.h>
29
30 /* bitmasks for RAPL MSRs, used by primitive access functions */
31 #define ENERGY_STATUS_MASK 0xffffffff
32
33 #define POWER_LIMIT1_MASK 0x7FFF
34 #define POWER_LIMIT1_ENABLE BIT(15)
35 #define POWER_LIMIT1_CLAMP BIT(16)
36
37 #define POWER_LIMIT2_MASK (0x7FFFULL<<32)
38 #define POWER_LIMIT2_ENABLE BIT_ULL(47)
39 #define POWER_LIMIT2_CLAMP BIT_ULL(48)
40 #define POWER_HIGH_LOCK BIT_ULL(63)
41 #define POWER_LOW_LOCK BIT(31)
42
43 #define POWER_LIMIT4_MASK 0x1FFF
44
45 #define TIME_WINDOW1_MASK (0x7FULL<<17)
46 #define TIME_WINDOW2_MASK (0x7FULL<<49)
47
48 #define POWER_UNIT_OFFSET 0
49 #define POWER_UNIT_MASK 0x0F
50
51 #define ENERGY_UNIT_OFFSET 0x08
52 #define ENERGY_UNIT_MASK 0x1F00
53
54 #define TIME_UNIT_OFFSET 0x10
55 #define TIME_UNIT_MASK 0xF0000
56
57 #define POWER_INFO_MAX_MASK (0x7fffULL<<32)
58 #define POWER_INFO_MIN_MASK (0x7fffULL<<16)
59 #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
60 #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
61
62 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
63 #define PP_POLICY_MASK 0x1F
64
65 /*
66 * SPR has different layout for Psys Domain PowerLimit registers.
67 * There are 17 bits of PL1 and PL2 instead of 15 bits.
68 * The Enable bits and TimeWindow bits are also shifted as a result.
69 */
70 #define PSYS_POWER_LIMIT1_MASK 0x1FFFF
71 #define PSYS_POWER_LIMIT1_ENABLE BIT(17)
72
73 #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
74 #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
75
76 #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
77 #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
78
79 /* bitmasks for RAPL TPMI, used by primitive access functions */
80 #define TPMI_POWER_LIMIT_MASK 0x3FFFF
81 #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
82 #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
83 #define TPMI_INFO_SPEC_MASK 0x3FFFF
84 #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
85 #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
86 #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
87
88 /* Non HW constants */
89 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
90 #define RAPL_PRIMITIVE_DUMMY BIT(2)
91
92 #define TIME_WINDOW_MAX_MSEC 40000
93 #define TIME_WINDOW_MIN_MSEC 250
94 #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
95 enum unit_type {
96 ARBITRARY_UNIT, /* no translation */
97 POWER_UNIT,
98 ENERGY_UNIT,
99 TIME_UNIT,
100 };
101
102 /* per domain data, some are optional */
103 #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
104
105 #define DOMAIN_STATE_INACTIVE BIT(0)
106 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
107
108 static const char *pl_names[NR_POWER_LIMITS] = {
109 [POWER_LIMIT1] = "long_term",
110 [POWER_LIMIT2] = "short_term",
111 [POWER_LIMIT4] = "peak_power",
112 };
113
114 enum pl_prims {
115 PL_ENABLE,
116 PL_CLAMP,
117 PL_LIMIT,
118 PL_TIME_WINDOW,
119 PL_MAX_POWER,
120 PL_LOCK,
121 };
122
is_pl_valid(struct rapl_domain * rd,int pl)123 static bool is_pl_valid(struct rapl_domain *rd, int pl)
124 {
125 if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
126 return false;
127 return rd->rpl[pl].name ? true : false;
128 }
129
get_pl_lock_prim(struct rapl_domain * rd,int pl)130 static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
131 {
132 if (rd->rp->priv->type == RAPL_IF_TPMI) {
133 if (pl == POWER_LIMIT1)
134 return PL1_LOCK;
135 if (pl == POWER_LIMIT2)
136 return PL2_LOCK;
137 if (pl == POWER_LIMIT4)
138 return PL4_LOCK;
139 }
140
141 /* MSR/MMIO Interface doesn't have Lock bit for PL4 */
142 if (pl == POWER_LIMIT4)
143 return -EINVAL;
144
145 /*
146 * Power Limit register that supports two power limits has a different
147 * bit position for the Lock bit.
148 */
149 if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
150 return FW_HIGH_LOCK;
151 return FW_LOCK;
152 }
153
get_pl_prim(struct rapl_domain * rd,int pl,enum pl_prims prim)154 static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
155 {
156 switch (pl) {
157 case POWER_LIMIT1:
158 if (prim == PL_ENABLE)
159 return PL1_ENABLE;
160 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
161 return PL1_CLAMP;
162 if (prim == PL_LIMIT)
163 return POWER_LIMIT1;
164 if (prim == PL_TIME_WINDOW)
165 return TIME_WINDOW1;
166 if (prim == PL_MAX_POWER)
167 return THERMAL_SPEC_POWER;
168 if (prim == PL_LOCK)
169 return get_pl_lock_prim(rd, pl);
170 return -EINVAL;
171 case POWER_LIMIT2:
172 if (prim == PL_ENABLE)
173 return PL2_ENABLE;
174 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
175 return PL2_CLAMP;
176 if (prim == PL_LIMIT)
177 return POWER_LIMIT2;
178 if (prim == PL_TIME_WINDOW)
179 return TIME_WINDOW2;
180 if (prim == PL_MAX_POWER)
181 return MAX_POWER;
182 if (prim == PL_LOCK)
183 return get_pl_lock_prim(rd, pl);
184 return -EINVAL;
185 case POWER_LIMIT4:
186 if (prim == PL_LIMIT)
187 return POWER_LIMIT4;
188 if (prim == PL_ENABLE)
189 return PL4_ENABLE;
190 /* PL4 would be around two times PL2, use same prim as PL2. */
191 if (prim == PL_MAX_POWER)
192 return MAX_POWER;
193 if (prim == PL_LOCK)
194 return get_pl_lock_prim(rd, pl);
195 return -EINVAL;
196 default:
197 return -EINVAL;
198 }
199 }
200
201 #define power_zone_to_rapl_domain(_zone) \
202 container_of(_zone, struct rapl_domain, power_zone)
203
204 struct rapl_defaults {
205 u8 floor_freq_reg_addr;
206 int (*check_unit)(struct rapl_domain *rd);
207 void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
208 u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
209 bool to_raw);
210 unsigned int dram_domain_energy_unit;
211 unsigned int psys_domain_energy_unit;
212 bool spr_psys_bits;
213 };
214 static struct rapl_defaults *defaults_msr;
215 static const struct rapl_defaults defaults_tpmi;
216
get_defaults(struct rapl_package * rp)217 static struct rapl_defaults *get_defaults(struct rapl_package *rp)
218 {
219 return rp->priv->defaults;
220 }
221
222 /* Sideband MBI registers */
223 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
224 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
225
226 #define PACKAGE_PLN_INT_SAVED BIT(0)
227 #define MAX_PRIM_NAME (32)
228
229 /* per domain data. used to describe individual knobs such that access function
230 * can be consolidated into one instead of many inline functions.
231 */
232 struct rapl_primitive_info {
233 const char *name;
234 u64 mask;
235 int shift;
236 enum rapl_domain_reg_id id;
237 enum unit_type unit;
238 u32 flag;
239 };
240
241 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
242 .name = #p, \
243 .mask = m, \
244 .shift = s, \
245 .id = i, \
246 .unit = u, \
247 .flag = f \
248 }
249
250 static void rapl_init_domains(struct rapl_package *rp);
251 static int rapl_read_data_raw(struct rapl_domain *rd,
252 enum rapl_primitives prim,
253 bool xlate, u64 *data);
254 static int rapl_write_data_raw(struct rapl_domain *rd,
255 enum rapl_primitives prim,
256 unsigned long long value);
257 static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
258 enum pl_prims pl_prim,
259 bool xlate, u64 *data);
260 static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
261 enum pl_prims pl_prim,
262 unsigned long long value);
263 static u64 rapl_unit_xlate(struct rapl_domain *rd,
264 enum unit_type type, u64 value, int to_raw);
265 static void package_power_limit_irq_save(struct rapl_package *rp);
266
267 static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
268
269 static const char *const rapl_domain_names[] = {
270 "package",
271 "core",
272 "uncore",
273 "dram",
274 "psys",
275 };
276
get_energy_counter(struct powercap_zone * power_zone,u64 * energy_raw)277 static int get_energy_counter(struct powercap_zone *power_zone,
278 u64 *energy_raw)
279 {
280 struct rapl_domain *rd;
281 u64 energy_now;
282
283 /* prevent CPU hotplug, make sure the RAPL domain does not go
284 * away while reading the counter.
285 */
286 cpus_read_lock();
287 rd = power_zone_to_rapl_domain(power_zone);
288
289 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
290 *energy_raw = energy_now;
291 cpus_read_unlock();
292
293 return 0;
294 }
295 cpus_read_unlock();
296
297 return -EIO;
298 }
299
get_max_energy_counter(struct powercap_zone * pcd_dev,u64 * energy)300 static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
301 {
302 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
303
304 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
305 return 0;
306 }
307
release_zone(struct powercap_zone * power_zone)308 static int release_zone(struct powercap_zone *power_zone)
309 {
310 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
311 struct rapl_package *rp = rd->rp;
312
313 /* package zone is the last zone of a package, we can free
314 * memory here since all children has been unregistered.
315 */
316 if (rd->id == RAPL_DOMAIN_PACKAGE) {
317 kfree(rd);
318 rp->domains = NULL;
319 }
320
321 return 0;
322
323 }
324
find_nr_power_limit(struct rapl_domain * rd)325 static int find_nr_power_limit(struct rapl_domain *rd)
326 {
327 int i, nr_pl = 0;
328
329 for (i = 0; i < NR_POWER_LIMITS; i++) {
330 if (is_pl_valid(rd, i))
331 nr_pl++;
332 }
333
334 return nr_pl;
335 }
336
set_domain_enable(struct powercap_zone * power_zone,bool mode)337 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
338 {
339 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
340 struct rapl_defaults *defaults = get_defaults(rd->rp);
341 u64 val;
342 int ret;
343
344 cpus_read_lock();
345 ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
346 if (ret)
347 goto end;
348
349 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val);
350 if (ret)
351 goto end;
352
353 if (mode != val) {
354 pr_debug("%s cannot be %s\n", power_zone->name,
355 str_enabled_disabled(mode));
356 goto end;
357 }
358
359 if (defaults->set_floor_freq)
360 defaults->set_floor_freq(rd, mode);
361
362 end:
363 cpus_read_unlock();
364
365 return ret;
366 }
367
get_domain_enable(struct powercap_zone * power_zone,bool * mode)368 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
369 {
370 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
371 u64 val;
372 int ret;
373
374 if (rd->rpl[POWER_LIMIT1].locked) {
375 *mode = false;
376 return 0;
377 }
378 cpus_read_lock();
379 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
380 if (!ret)
381 *mode = val;
382 cpus_read_unlock();
383
384 return ret;
385 }
386
387 /* per RAPL domain ops, in the order of rapl_domain_type */
388 static const struct powercap_zone_ops zone_ops[] = {
389 /* RAPL_DOMAIN_PACKAGE */
390 {
391 .get_energy_uj = get_energy_counter,
392 .get_max_energy_range_uj = get_max_energy_counter,
393 .release = release_zone,
394 .set_enable = set_domain_enable,
395 .get_enable = get_domain_enable,
396 },
397 /* RAPL_DOMAIN_PP0 */
398 {
399 .get_energy_uj = get_energy_counter,
400 .get_max_energy_range_uj = get_max_energy_counter,
401 .release = release_zone,
402 .set_enable = set_domain_enable,
403 .get_enable = get_domain_enable,
404 },
405 /* RAPL_DOMAIN_PP1 */
406 {
407 .get_energy_uj = get_energy_counter,
408 .get_max_energy_range_uj = get_max_energy_counter,
409 .release = release_zone,
410 .set_enable = set_domain_enable,
411 .get_enable = get_domain_enable,
412 },
413 /* RAPL_DOMAIN_DRAM */
414 {
415 .get_energy_uj = get_energy_counter,
416 .get_max_energy_range_uj = get_max_energy_counter,
417 .release = release_zone,
418 .set_enable = set_domain_enable,
419 .get_enable = get_domain_enable,
420 },
421 /* RAPL_DOMAIN_PLATFORM */
422 {
423 .get_energy_uj = get_energy_counter,
424 .get_max_energy_range_uj = get_max_energy_counter,
425 .release = release_zone,
426 .set_enable = set_domain_enable,
427 .get_enable = get_domain_enable,
428 },
429 };
430
431 /*
432 * Constraint index used by powercap can be different than power limit (PL)
433 * index in that some PLs maybe missing due to non-existent MSRs. So we
434 * need to convert here by finding the valid PLs only (name populated).
435 */
contraint_to_pl(struct rapl_domain * rd,int cid)436 static int contraint_to_pl(struct rapl_domain *rd, int cid)
437 {
438 int i, j;
439
440 for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
441 if (is_pl_valid(rd, i) && j++ == cid) {
442 pr_debug("%s: index %d\n", __func__, i);
443 return i;
444 }
445 }
446 pr_err("Cannot find matching power limit for constraint %d\n", cid);
447
448 return -EINVAL;
449 }
450
set_power_limit(struct powercap_zone * power_zone,int cid,u64 power_limit)451 static int set_power_limit(struct powercap_zone *power_zone, int cid,
452 u64 power_limit)
453 {
454 struct rapl_domain *rd;
455 struct rapl_package *rp;
456 int ret = 0;
457 int id;
458
459 cpus_read_lock();
460 rd = power_zone_to_rapl_domain(power_zone);
461 id = contraint_to_pl(rd, cid);
462 rp = rd->rp;
463
464 ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
465 if (!ret)
466 package_power_limit_irq_save(rp);
467 cpus_read_unlock();
468 return ret;
469 }
470
get_current_power_limit(struct powercap_zone * power_zone,int cid,u64 * data)471 static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
472 u64 *data)
473 {
474 struct rapl_domain *rd;
475 u64 val;
476 int ret = 0;
477 int id;
478
479 cpus_read_lock();
480 rd = power_zone_to_rapl_domain(power_zone);
481 id = contraint_to_pl(rd, cid);
482
483 ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
484 if (!ret)
485 *data = val;
486
487 cpus_read_unlock();
488
489 return ret;
490 }
491
set_time_window(struct powercap_zone * power_zone,int cid,u64 window)492 static int set_time_window(struct powercap_zone *power_zone, int cid,
493 u64 window)
494 {
495 struct rapl_domain *rd;
496 int ret = 0;
497 int id;
498
499 cpus_read_lock();
500 rd = power_zone_to_rapl_domain(power_zone);
501 id = contraint_to_pl(rd, cid);
502
503 ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
504
505 cpus_read_unlock();
506 return ret;
507 }
508
get_time_window(struct powercap_zone * power_zone,int cid,u64 * data)509 static int get_time_window(struct powercap_zone *power_zone, int cid,
510 u64 *data)
511 {
512 struct rapl_domain *rd;
513 u64 val;
514 int ret = 0;
515 int id;
516
517 cpus_read_lock();
518 rd = power_zone_to_rapl_domain(power_zone);
519 id = contraint_to_pl(rd, cid);
520
521 ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
522 if (!ret)
523 *data = val;
524
525 cpus_read_unlock();
526
527 return ret;
528 }
529
get_constraint_name(struct powercap_zone * power_zone,int cid)530 static const char *get_constraint_name(struct powercap_zone *power_zone,
531 int cid)
532 {
533 struct rapl_domain *rd;
534 int id;
535
536 rd = power_zone_to_rapl_domain(power_zone);
537 id = contraint_to_pl(rd, cid);
538 if (id >= 0)
539 return rd->rpl[id].name;
540
541 return NULL;
542 }
543
get_max_power(struct powercap_zone * power_zone,int cid,u64 * data)544 static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
545 {
546 struct rapl_domain *rd;
547 u64 val;
548 int ret = 0;
549 int id;
550
551 cpus_read_lock();
552 rd = power_zone_to_rapl_domain(power_zone);
553 id = contraint_to_pl(rd, cid);
554
555 ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
556 if (!ret)
557 *data = val;
558
559 /* As a generalization rule, PL4 would be around two times PL2. */
560 if (id == POWER_LIMIT4)
561 *data = *data * 2;
562
563 cpus_read_unlock();
564
565 return ret;
566 }
567
568 static const struct powercap_zone_constraint_ops constraint_ops = {
569 .set_power_limit_uw = set_power_limit,
570 .get_power_limit_uw = get_current_power_limit,
571 .set_time_window_us = set_time_window,
572 .get_time_window_us = get_time_window,
573 .get_max_power_uw = get_max_power,
574 .get_name = get_constraint_name,
575 };
576
577 /* Return the id used for read_raw/write_raw callback */
get_rid(struct rapl_package * rp)578 static int get_rid(struct rapl_package *rp)
579 {
580 return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
581 }
582
583 /* called after domain detection and package level data are set */
rapl_init_domains(struct rapl_package * rp)584 static void rapl_init_domains(struct rapl_package *rp)
585 {
586 enum rapl_domain_type i;
587 enum rapl_domain_reg_id j;
588 struct rapl_domain *rd = rp->domains;
589
590 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
591 unsigned int mask = rp->domain_map & (1 << i);
592 int t;
593
594 if (!mask)
595 continue;
596
597 rd->rp = rp;
598
599 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
600 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
601 rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
602 rp->id);
603 } else {
604 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
605 rapl_domain_names[i]);
606 }
607
608 rd->id = i;
609
610 /* PL1 is supported by default */
611 rp->priv->limits[i] |= BIT(POWER_LIMIT1);
612
613 for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
614 if (rp->priv->limits[i] & BIT(t))
615 rd->rpl[t].name = pl_names[t];
616 }
617
618 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
619 rd->regs[j] = rp->priv->regs[i][j];
620
621 rd++;
622 }
623 }
624
rapl_unit_xlate(struct rapl_domain * rd,enum unit_type type,u64 value,int to_raw)625 static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
626 u64 value, int to_raw)
627 {
628 u64 units = 1;
629 struct rapl_defaults *defaults = get_defaults(rd->rp);
630 u64 scale = 1;
631
632 switch (type) {
633 case POWER_UNIT:
634 units = rd->power_unit;
635 break;
636 case ENERGY_UNIT:
637 scale = ENERGY_UNIT_SCALE;
638 units = rd->energy_unit;
639 break;
640 case TIME_UNIT:
641 return defaults->compute_time_window(rd, value, to_raw);
642 case ARBITRARY_UNIT:
643 default:
644 return value;
645 }
646
647 if (to_raw)
648 return div64_u64(value, units) * scale;
649
650 value *= units;
651
652 return div64_u64(value, scale);
653 }
654
655 /* RAPL primitives for MSR and MMIO I/F */
656 static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
657 /* name, mask, shift, msr index, unit divisor */
658 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
659 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
660 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
661 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
662 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
663 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
664 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
665 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
666 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
667 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
668 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
669 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
670 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
671 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
672 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
673 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
674 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
675 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
676 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
677 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
678 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
679 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
680 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
681 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
682 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
683 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
684 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
685 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
686 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
687 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
688 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
689 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
690 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
691 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
692 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
693 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
694 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
695 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
696 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
697 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
698 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
699 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
700 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
701 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
702 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
703 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
704 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
705 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
706 /* non-hardware */
707 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
708 RAPL_PRIMITIVE_DERIVED),
709 };
710
711 /* RAPL primitives for TPMI I/F */
712 static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
713 /* name, mask, shift, msr index, unit divisor */
714 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
715 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
716 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
717 RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
718 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
719 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
720 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
721 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
722 [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
723 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
724 [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
725 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
726 [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
727 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
728 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
729 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
730 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
731 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
732 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
733 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
734 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
735 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
736 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
737 RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
738 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
739 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
740 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
741 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
742 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
743 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
744 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
745 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
746 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
747 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
748 /* non-hardware */
749 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
750 POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
751 };
752
get_rpi(struct rapl_package * rp,int prim)753 static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
754 {
755 struct rapl_primitive_info *rpi = rp->priv->rpi;
756
757 if (prim < 0 || prim >= NR_RAPL_PRIMITIVES || !rpi)
758 return NULL;
759
760 return &rpi[prim];
761 }
762
rapl_config(struct rapl_package * rp)763 static int rapl_config(struct rapl_package *rp)
764 {
765 switch (rp->priv->type) {
766 /* MMIO I/F shares the same register layout as MSR registers */
767 case RAPL_IF_MMIO:
768 case RAPL_IF_MSR:
769 rp->priv->defaults = (void *)defaults_msr;
770 rp->priv->rpi = (void *)rpi_msr;
771 break;
772 case RAPL_IF_TPMI:
773 rp->priv->defaults = (void *)&defaults_tpmi;
774 rp->priv->rpi = (void *)rpi_tpmi;
775 break;
776 default:
777 return -EINVAL;
778 }
779
780 /* defaults_msr can be NULL on unsupported platforms */
781 if (!rp->priv->defaults || !rp->priv->rpi)
782 return -ENODEV;
783
784 return 0;
785 }
786
787 static enum rapl_primitives
prim_fixups(struct rapl_domain * rd,enum rapl_primitives prim)788 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
789 {
790 struct rapl_defaults *defaults = get_defaults(rd->rp);
791
792 if (!defaults->spr_psys_bits)
793 return prim;
794
795 if (rd->id != RAPL_DOMAIN_PLATFORM)
796 return prim;
797
798 switch (prim) {
799 case POWER_LIMIT1:
800 return PSYS_POWER_LIMIT1;
801 case POWER_LIMIT2:
802 return PSYS_POWER_LIMIT2;
803 case PL1_ENABLE:
804 return PSYS_PL1_ENABLE;
805 case PL2_ENABLE:
806 return PSYS_PL2_ENABLE;
807 case TIME_WINDOW1:
808 return PSYS_TIME_WINDOW1;
809 case TIME_WINDOW2:
810 return PSYS_TIME_WINDOW2;
811 default:
812 return prim;
813 }
814 }
815
816 /* Read primitive data based on its related struct rapl_primitive_info.
817 * if xlate flag is set, return translated data based on data units, i.e.
818 * time, energy, and power.
819 * RAPL MSRs are non-architectual and are laid out not consistently across
820 * domains. Here we use primitive info to allow writing consolidated access
821 * functions.
822 * For a given primitive, it is processed by MSR mask and shift. Unit conversion
823 * is pre-assigned based on RAPL unit MSRs read at init time.
824 * 63-------------------------- 31--------------------------- 0
825 * | xxxxx (mask) |
826 * | |<- shift ----------------|
827 * 63-------------------------- 31--------------------------- 0
828 */
rapl_read_data_raw(struct rapl_domain * rd,enum rapl_primitives prim,bool xlate,u64 * data)829 static int rapl_read_data_raw(struct rapl_domain *rd,
830 enum rapl_primitives prim, bool xlate, u64 *data)
831 {
832 u64 value;
833 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
834 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
835 struct reg_action ra;
836
837 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
838 return -EINVAL;
839
840 ra.reg = rd->regs[rpi->id];
841 if (!ra.reg.val)
842 return -EINVAL;
843
844 /* non-hardware data are collected by the polling thread */
845 if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
846 *data = rd->rdd.primitives[prim];
847 return 0;
848 }
849
850 ra.mask = rpi->mask;
851
852 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
853 pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
854 return -EIO;
855 }
856
857 value = ra.value >> rpi->shift;
858
859 if (xlate)
860 *data = rapl_unit_xlate(rd, rpi->unit, value, 0);
861 else
862 *data = value;
863
864 return 0;
865 }
866
867 /* Similar use of primitive info in the read counterpart */
rapl_write_data_raw(struct rapl_domain * rd,enum rapl_primitives prim,unsigned long long value)868 static int rapl_write_data_raw(struct rapl_domain *rd,
869 enum rapl_primitives prim,
870 unsigned long long value)
871 {
872 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
873 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
874 u64 bits;
875 struct reg_action ra;
876 int ret;
877
878 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
879 return -EINVAL;
880
881 bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
882 bits <<= rpi->shift;
883 bits &= rpi->mask;
884
885 memset(&ra, 0, sizeof(ra));
886
887 ra.reg = rd->regs[rpi->id];
888 ra.mask = rpi->mask;
889 ra.value = bits;
890
891 ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
892
893 return ret;
894 }
895
rapl_read_pl_data(struct rapl_domain * rd,int pl,enum pl_prims pl_prim,bool xlate,u64 * data)896 static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
897 enum pl_prims pl_prim, bool xlate, u64 *data)
898 {
899 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
900
901 if (!is_pl_valid(rd, pl))
902 return -EINVAL;
903
904 return rapl_read_data_raw(rd, prim, xlate, data);
905 }
906
rapl_write_pl_data(struct rapl_domain * rd,int pl,enum pl_prims pl_prim,unsigned long long value)907 static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
908 enum pl_prims pl_prim,
909 unsigned long long value)
910 {
911 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
912
913 if (!is_pl_valid(rd, pl))
914 return -EINVAL;
915
916 if (rd->rpl[pl].locked) {
917 pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
918 return -EACCES;
919 }
920
921 return rapl_write_data_raw(rd, prim, value);
922 }
923 /*
924 * Raw RAPL data stored in MSRs are in certain scales. We need to
925 * convert them into standard units based on the units reported in
926 * the RAPL unit MSRs. This is specific to CPUs as the method to
927 * calculate units differ on different CPUs.
928 * We convert the units to below format based on CPUs.
929 * i.e.
930 * energy unit: picoJoules : Represented in picoJoules by default
931 * power unit : microWatts : Represented in milliWatts by default
932 * time unit : microseconds: Represented in seconds by default
933 */
rapl_check_unit_core(struct rapl_domain * rd)934 static int rapl_check_unit_core(struct rapl_domain *rd)
935 {
936 struct reg_action ra;
937 u32 value;
938
939 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
940 ra.mask = ~0;
941 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
942 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
943 ra.reg.val, rd->rp->name, rd->name);
944 return -ENODEV;
945 }
946
947 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
948 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
949
950 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
951 rd->power_unit = 1000000 / (1 << value);
952
953 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
954 rd->time_unit = 1000000 / (1 << value);
955
956 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
957 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
958
959 return 0;
960 }
961
rapl_check_unit_atom(struct rapl_domain * rd)962 static int rapl_check_unit_atom(struct rapl_domain *rd)
963 {
964 struct reg_action ra;
965 u32 value;
966
967 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
968 ra.mask = ~0;
969 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
970 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
971 ra.reg.val, rd->rp->name, rd->name);
972 return -ENODEV;
973 }
974
975 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
976 rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
977
978 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
979 rd->power_unit = (1 << value) * 1000;
980
981 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
982 rd->time_unit = 1000000 / (1 << value);
983
984 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
985 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
986
987 return 0;
988 }
989
power_limit_irq_save_cpu(void * info)990 static void power_limit_irq_save_cpu(void *info)
991 {
992 u32 l, h = 0;
993 struct rapl_package *rp = (struct rapl_package *)info;
994
995 /* save the state of PLN irq mask bit before disabling it */
996 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
997 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
998 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
999 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
1000 }
1001 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
1002 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
1003 }
1004
1005 /* REVISIT:
1006 * When package power limit is set artificially low by RAPL, LVT
1007 * thermal interrupt for package power limit should be ignored
1008 * since we are not really exceeding the real limit. The intention
1009 * is to avoid excessive interrupts while we are trying to save power.
1010 * A useful feature might be routing the package_power_limit interrupt
1011 * to userspace via eventfd. once we have a usecase, this is simple
1012 * to do by adding an atomic notifier.
1013 */
1014
package_power_limit_irq_save(struct rapl_package * rp)1015 static void package_power_limit_irq_save(struct rapl_package *rp)
1016 {
1017 if (rp->lead_cpu < 0)
1018 return;
1019
1020 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
1021 return;
1022
1023 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
1024 }
1025
1026 /*
1027 * Restore per package power limit interrupt enable state. Called from cpu
1028 * hotplug code on package removal.
1029 */
package_power_limit_irq_restore(struct rapl_package * rp)1030 static void package_power_limit_irq_restore(struct rapl_package *rp)
1031 {
1032 u32 l, h;
1033
1034 if (rp->lead_cpu < 0)
1035 return;
1036
1037 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
1038 return;
1039
1040 /* irq enable state not saved, nothing to restore */
1041 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
1042 return;
1043
1044 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
1045
1046 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
1047 l |= PACKAGE_THERM_INT_PLN_ENABLE;
1048 else
1049 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
1050
1051 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
1052 }
1053
set_floor_freq_default(struct rapl_domain * rd,bool mode)1054 static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
1055 {
1056 int i;
1057
1058 /* always enable clamp such that p-state can go below OS requested
1059 * range. power capping priority over guranteed frequency.
1060 */
1061 rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
1062
1063 for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
1064 rapl_write_pl_data(rd, i, PL_ENABLE, mode);
1065 rapl_write_pl_data(rd, i, PL_CLAMP, mode);
1066 }
1067 }
1068
set_floor_freq_atom(struct rapl_domain * rd,bool enable)1069 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
1070 {
1071 static u32 power_ctrl_orig_val;
1072 struct rapl_defaults *defaults = get_defaults(rd->rp);
1073 u32 mdata;
1074
1075 if (!defaults->floor_freq_reg_addr) {
1076 pr_err("Invalid floor frequency config register\n");
1077 return;
1078 }
1079
1080 if (!power_ctrl_orig_val)
1081 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
1082 defaults->floor_freq_reg_addr,
1083 &power_ctrl_orig_val);
1084 mdata = power_ctrl_orig_val;
1085 if (enable) {
1086 mdata &= ~(0x7f << 8);
1087 mdata |= 1 << 8;
1088 }
1089 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
1090 defaults->floor_freq_reg_addr, mdata);
1091 }
1092
rapl_compute_time_window_core(struct rapl_domain * rd,u64 value,bool to_raw)1093 static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
1094 bool to_raw)
1095 {
1096 u64 f, y; /* fraction and exp. used for time unit */
1097
1098 /*
1099 * Special processing based on 2^Y*(1+F/4), refer
1100 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
1101 */
1102 if (!to_raw) {
1103 f = (value & 0x60) >> 5;
1104 y = value & 0x1f;
1105 value = (1 << y) * (4 + f) * rd->time_unit / 4;
1106 } else {
1107 if (value < rd->time_unit)
1108 return 0;
1109
1110 do_div(value, rd->time_unit);
1111 y = ilog2(value);
1112
1113 /*
1114 * The target hardware field is 7 bits wide, so return all ones
1115 * if the exponent is too large.
1116 */
1117 if (y > 0x1f)
1118 return 0x7f;
1119
1120 f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
1121 value = (y & 0x1f) | ((f & 0x3) << 5);
1122 }
1123 return value;
1124 }
1125
rapl_compute_time_window_atom(struct rapl_domain * rd,u64 value,bool to_raw)1126 static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
1127 bool to_raw)
1128 {
1129 /*
1130 * Atom time unit encoding is straight forward val * time_unit,
1131 * where time_unit is default to 1 sec. Never 0.
1132 */
1133 if (!to_raw)
1134 return (value) ? value * rd->time_unit : rd->time_unit;
1135
1136 value = div64_u64(value, rd->time_unit);
1137
1138 return value;
1139 }
1140
1141 /* TPMI Unit register has different layout */
1142 #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
1143 #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
1144 #define TPMI_ENERGY_UNIT_OFFSET 0x06
1145 #define TPMI_ENERGY_UNIT_MASK 0x7C0
1146 #define TPMI_TIME_UNIT_OFFSET 0x0C
1147 #define TPMI_TIME_UNIT_MASK 0xF000
1148
rapl_check_unit_tpmi(struct rapl_domain * rd)1149 static int rapl_check_unit_tpmi(struct rapl_domain *rd)
1150 {
1151 struct reg_action ra;
1152 u32 value;
1153
1154 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1155 ra.mask = ~0;
1156 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
1157 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
1158 ra.reg.val, rd->rp->name, rd->name);
1159 return -ENODEV;
1160 }
1161
1162 value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
1163 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
1164
1165 value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
1166 rd->power_unit = 1000000 / (1 << value);
1167
1168 value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
1169 rd->time_unit = 1000000 / (1 << value);
1170
1171 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
1172 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
1173
1174 return 0;
1175 }
1176
1177 static const struct rapl_defaults defaults_tpmi = {
1178 .check_unit = rapl_check_unit_tpmi,
1179 /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
1180 .set_floor_freq = set_floor_freq_default,
1181 .compute_time_window = rapl_compute_time_window_core,
1182 };
1183
1184 static const struct rapl_defaults rapl_defaults_core = {
1185 .floor_freq_reg_addr = 0,
1186 .check_unit = rapl_check_unit_core,
1187 .set_floor_freq = set_floor_freq_default,
1188 .compute_time_window = rapl_compute_time_window_core,
1189 };
1190
1191 static const struct rapl_defaults rapl_defaults_hsw_server = {
1192 .check_unit = rapl_check_unit_core,
1193 .set_floor_freq = set_floor_freq_default,
1194 .compute_time_window = rapl_compute_time_window_core,
1195 .dram_domain_energy_unit = 15300,
1196 };
1197
1198 static const struct rapl_defaults rapl_defaults_spr_server = {
1199 .check_unit = rapl_check_unit_core,
1200 .set_floor_freq = set_floor_freq_default,
1201 .compute_time_window = rapl_compute_time_window_core,
1202 .psys_domain_energy_unit = 1000000000,
1203 .spr_psys_bits = true,
1204 };
1205
1206 static const struct rapl_defaults rapl_defaults_byt = {
1207 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
1208 .check_unit = rapl_check_unit_atom,
1209 .set_floor_freq = set_floor_freq_atom,
1210 .compute_time_window = rapl_compute_time_window_atom,
1211 };
1212
1213 static const struct rapl_defaults rapl_defaults_tng = {
1214 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
1215 .check_unit = rapl_check_unit_atom,
1216 .set_floor_freq = set_floor_freq_atom,
1217 .compute_time_window = rapl_compute_time_window_atom,
1218 };
1219
1220 static const struct rapl_defaults rapl_defaults_ann = {
1221 .floor_freq_reg_addr = 0,
1222 .check_unit = rapl_check_unit_atom,
1223 .set_floor_freq = NULL,
1224 .compute_time_window = rapl_compute_time_window_atom,
1225 };
1226
1227 static const struct rapl_defaults rapl_defaults_cht = {
1228 .floor_freq_reg_addr = 0,
1229 .check_unit = rapl_check_unit_atom,
1230 .set_floor_freq = NULL,
1231 .compute_time_window = rapl_compute_time_window_atom,
1232 };
1233
1234 static const struct rapl_defaults rapl_defaults_amd = {
1235 .check_unit = rapl_check_unit_core,
1236 };
1237
1238 static const struct x86_cpu_id rapl_ids[] __initconst = {
1239 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core),
1240 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core),
1241
1242 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core),
1243 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core),
1244
1245 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core),
1246 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core),
1247 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core),
1248 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server),
1249
1250 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core),
1251 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core),
1252 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core),
1253 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server),
1254
1255 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core),
1256 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core),
1257 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server),
1258 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core),
1259 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core),
1260 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core),
1261 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core),
1262 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core),
1263 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core),
1264 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server),
1265 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server),
1266 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core),
1267 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core),
1268 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core),
1269 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core),
1270 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
1271 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
1272 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
1273 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &rapl_defaults_core),
1274 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
1275 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
1276 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
1277 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core),
1278 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
1279 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
1280 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
1281 X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
1282
1283 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
1284 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht),
1285 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng),
1286 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann),
1287 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core),
1288 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
1289 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core),
1290 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core),
1291 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core),
1292 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core),
1293
1294 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server),
1295 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server),
1296
1297 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
1298 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
1299 X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd),
1300 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
1301 {}
1302 };
1303 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
1304
1305 /* Read once for all raw primitive data for domains */
rapl_update_domain_data(struct rapl_package * rp)1306 static void rapl_update_domain_data(struct rapl_package *rp)
1307 {
1308 int dmn, prim;
1309 u64 val;
1310
1311 for (dmn = 0; dmn < rp->nr_domains; dmn++) {
1312 pr_debug("update %s domain %s data\n", rp->name,
1313 rp->domains[dmn].name);
1314 /* exclude non-raw primitives */
1315 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
1316 struct rapl_primitive_info *rpi = get_rpi(rp, prim);
1317
1318 if (!rapl_read_data_raw(&rp->domains[dmn], prim,
1319 rpi->unit, &val))
1320 rp->domains[dmn].rdd.primitives[prim] = val;
1321 }
1322 }
1323
1324 }
1325
rapl_package_register_powercap(struct rapl_package * rp)1326 static int rapl_package_register_powercap(struct rapl_package *rp)
1327 {
1328 struct rapl_domain *rd;
1329 struct powercap_zone *power_zone = NULL;
1330 int nr_pl, ret;
1331
1332 /* Update the domain data of the new package */
1333 rapl_update_domain_data(rp);
1334
1335 /* first we register package domain as the parent zone */
1336 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1337 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1338 nr_pl = find_nr_power_limit(rd);
1339 pr_debug("register package domain %s\n", rp->name);
1340 power_zone = powercap_register_zone(&rd->power_zone,
1341 rp->priv->control_type, rp->name,
1342 NULL, &zone_ops[rd->id], nr_pl,
1343 &constraint_ops);
1344 if (IS_ERR(power_zone)) {
1345 pr_debug("failed to register power zone %s\n",
1346 rp->name);
1347 return PTR_ERR(power_zone);
1348 }
1349 /* track parent zone in per package/socket data */
1350 rp->power_zone = power_zone;
1351 /* done, only one package domain per socket */
1352 break;
1353 }
1354 }
1355 if (!power_zone) {
1356 pr_err("no package domain found, unknown topology!\n");
1357 return -ENODEV;
1358 }
1359 /* now register domains as children of the socket/package */
1360 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1361 struct powercap_zone *parent = rp->power_zone;
1362
1363 if (rd->id == RAPL_DOMAIN_PACKAGE)
1364 continue;
1365 if (rd->id == RAPL_DOMAIN_PLATFORM)
1366 parent = NULL;
1367 /* number of power limits per domain varies */
1368 nr_pl = find_nr_power_limit(rd);
1369 power_zone = powercap_register_zone(&rd->power_zone,
1370 rp->priv->control_type,
1371 rd->name, parent,
1372 &zone_ops[rd->id], nr_pl,
1373 &constraint_ops);
1374
1375 if (IS_ERR(power_zone)) {
1376 pr_debug("failed to register power_zone, %s:%s\n",
1377 rp->name, rd->name);
1378 ret = PTR_ERR(power_zone);
1379 goto err_cleanup;
1380 }
1381 }
1382 return 0;
1383
1384 err_cleanup:
1385 /*
1386 * Clean up previously initialized domains within the package if we
1387 * failed after the first domain setup.
1388 */
1389 while (--rd >= rp->domains) {
1390 pr_debug("unregister %s domain %s\n", rp->name, rd->name);
1391 powercap_unregister_zone(rp->priv->control_type,
1392 &rd->power_zone);
1393 }
1394
1395 return ret;
1396 }
1397
rapl_check_domain(int domain,struct rapl_package * rp)1398 static int rapl_check_domain(int domain, struct rapl_package *rp)
1399 {
1400 struct reg_action ra;
1401
1402 switch (domain) {
1403 case RAPL_DOMAIN_PACKAGE:
1404 case RAPL_DOMAIN_PP0:
1405 case RAPL_DOMAIN_PP1:
1406 case RAPL_DOMAIN_DRAM:
1407 case RAPL_DOMAIN_PLATFORM:
1408 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
1409 break;
1410 default:
1411 pr_err("invalid domain id %d\n", domain);
1412 return -EINVAL;
1413 }
1414 /* make sure domain counters are available and contains non-zero
1415 * values, otherwise skip it.
1416 */
1417
1418 ra.mask = ENERGY_STATUS_MASK;
1419 if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
1420 return -ENODEV;
1421
1422 return 0;
1423 }
1424
1425 /*
1426 * Get per domain energy/power/time unit.
1427 * RAPL Interfaces without per domain unit register will use the package
1428 * scope unit register to set per domain units.
1429 */
rapl_get_domain_unit(struct rapl_domain * rd)1430 static int rapl_get_domain_unit(struct rapl_domain *rd)
1431 {
1432 struct rapl_defaults *defaults = get_defaults(rd->rp);
1433 int ret;
1434
1435 if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
1436 if (!rd->rp->priv->reg_unit.val) {
1437 pr_err("No valid Unit register found\n");
1438 return -ENODEV;
1439 }
1440 rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
1441 }
1442
1443 if (!defaults->check_unit) {
1444 pr_err("missing .check_unit() callback\n");
1445 return -ENODEV;
1446 }
1447
1448 ret = defaults->check_unit(rd);
1449 if (ret)
1450 return ret;
1451
1452 if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
1453 rd->energy_unit = defaults->dram_domain_energy_unit;
1454 if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
1455 rd->energy_unit = defaults->psys_domain_energy_unit;
1456 return 0;
1457 }
1458
1459 /*
1460 * Check if power limits are available. Two cases when they are not available:
1461 * 1. Locked by BIOS, in this case we still provide read-only access so that
1462 * users can see what limit is set by the BIOS.
1463 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
1464 * exist at all. In this case, we do not show the constraints in powercap.
1465 *
1466 * Called after domains are detected and initialized.
1467 */
rapl_detect_powerlimit(struct rapl_domain * rd)1468 static void rapl_detect_powerlimit(struct rapl_domain *rd)
1469 {
1470 u64 val64;
1471 int i;
1472
1473 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1474 if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
1475 if (val64) {
1476 rd->rpl[i].locked = true;
1477 pr_info("%s:%s:%s locked by BIOS\n",
1478 rd->rp->name, rd->name, pl_names[i]);
1479 }
1480 }
1481
1482 if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
1483 rd->rpl[i].name = NULL;
1484 }
1485 }
1486
1487 /* Detect active and valid domains for the given CPU, caller must
1488 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
1489 */
rapl_detect_domains(struct rapl_package * rp)1490 static int rapl_detect_domains(struct rapl_package *rp)
1491 {
1492 struct rapl_domain *rd;
1493 int i;
1494
1495 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
1496 /* use physical package id to read counters */
1497 if (!rapl_check_domain(i, rp)) {
1498 rp->domain_map |= 1 << i;
1499 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
1500 }
1501 }
1502 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
1503 if (!rp->nr_domains) {
1504 pr_debug("no valid rapl domains found in %s\n", rp->name);
1505 return -ENODEV;
1506 }
1507 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
1508
1509 rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
1510 GFP_KERNEL);
1511 if (!rp->domains)
1512 return -ENOMEM;
1513
1514 rapl_init_domains(rp);
1515
1516 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1517 rapl_get_domain_unit(rd);
1518 rapl_detect_powerlimit(rd);
1519 }
1520
1521 return 0;
1522 }
1523
1524 /* called from CPU hotplug notifier, hotplug lock held */
rapl_remove_package_cpuslocked(struct rapl_package * rp)1525 void rapl_remove_package_cpuslocked(struct rapl_package *rp)
1526 {
1527 struct rapl_domain *rd, *rd_package = NULL;
1528
1529 package_power_limit_irq_restore(rp);
1530
1531 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1532 int i;
1533
1534 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1535 rapl_write_pl_data(rd, i, PL_ENABLE, 0);
1536 rapl_write_pl_data(rd, i, PL_CLAMP, 0);
1537 }
1538
1539 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1540 rd_package = rd;
1541 continue;
1542 }
1543 pr_debug("remove package, undo power limit on %s: %s\n",
1544 rp->name, rd->name);
1545 powercap_unregister_zone(rp->priv->control_type,
1546 &rd->power_zone);
1547 }
1548 /* do parent zone last */
1549 powercap_unregister_zone(rp->priv->control_type,
1550 &rd_package->power_zone);
1551 list_del(&rp->plist);
1552 kfree(rp);
1553 }
1554 EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
1555
rapl_remove_package(struct rapl_package * rp)1556 void rapl_remove_package(struct rapl_package *rp)
1557 {
1558 guard(cpus_read_lock)();
1559 rapl_remove_package_cpuslocked(rp);
1560 }
1561 EXPORT_SYMBOL_GPL(rapl_remove_package);
1562
1563 /* caller to ensure CPU hotplug lock is held */
rapl_find_package_domain_cpuslocked(int id,struct rapl_if_priv * priv,bool id_is_cpu)1564 struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
1565 bool id_is_cpu)
1566 {
1567 struct rapl_package *rp;
1568 int uid;
1569
1570 if (id_is_cpu)
1571 uid = topology_logical_die_id(id);
1572 else
1573 uid = id;
1574
1575 list_for_each_entry(rp, &rapl_packages, plist) {
1576 if (rp->id == uid
1577 && rp->priv->control_type == priv->control_type)
1578 return rp;
1579 }
1580
1581 return NULL;
1582 }
1583 EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
1584
rapl_find_package_domain(int id,struct rapl_if_priv * priv,bool id_is_cpu)1585 struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
1586 {
1587 guard(cpus_read_lock)();
1588 return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
1589 }
1590 EXPORT_SYMBOL_GPL(rapl_find_package_domain);
1591
1592 /* called from CPU hotplug notifier, hotplug lock held */
rapl_add_package_cpuslocked(int id,struct rapl_if_priv * priv,bool id_is_cpu)1593 struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
1594 {
1595 struct rapl_package *rp;
1596 int ret;
1597
1598 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
1599 if (!rp)
1600 return ERR_PTR(-ENOMEM);
1601
1602 if (id_is_cpu) {
1603 rp->id = topology_logical_die_id(id);
1604 rp->lead_cpu = id;
1605 if (topology_max_die_per_package() > 1)
1606 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
1607 topology_physical_package_id(id), topology_die_id(id));
1608 else
1609 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
1610 topology_physical_package_id(id));
1611 } else {
1612 rp->id = id;
1613 rp->lead_cpu = -1;
1614 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
1615 }
1616
1617 rp->priv = priv;
1618 ret = rapl_config(rp);
1619 if (ret)
1620 goto err_free_package;
1621
1622 /* check if the package contains valid domains */
1623 if (rapl_detect_domains(rp)) {
1624 ret = -ENODEV;
1625 goto err_free_package;
1626 }
1627 ret = rapl_package_register_powercap(rp);
1628 if (!ret) {
1629 INIT_LIST_HEAD(&rp->plist);
1630 list_add(&rp->plist, &rapl_packages);
1631 return rp;
1632 }
1633
1634 err_free_package:
1635 kfree(rp->domains);
1636 kfree(rp);
1637 return ERR_PTR(ret);
1638 }
1639 EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
1640
rapl_add_package(int id,struct rapl_if_priv * priv,bool id_is_cpu)1641 struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
1642 {
1643 guard(cpus_read_lock)();
1644 return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
1645 }
1646 EXPORT_SYMBOL_GPL(rapl_add_package);
1647
power_limit_state_save(void)1648 static void power_limit_state_save(void)
1649 {
1650 struct rapl_package *rp;
1651 struct rapl_domain *rd;
1652 int ret, i;
1653
1654 cpus_read_lock();
1655 list_for_each_entry(rp, &rapl_packages, plist) {
1656 if (!rp->power_zone)
1657 continue;
1658 rd = power_zone_to_rapl_domain(rp->power_zone);
1659 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1660 ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
1661 &rd->rpl[i].last_power_limit);
1662 if (ret)
1663 rd->rpl[i].last_power_limit = 0;
1664 }
1665 }
1666 cpus_read_unlock();
1667 }
1668
power_limit_state_restore(void)1669 static void power_limit_state_restore(void)
1670 {
1671 struct rapl_package *rp;
1672 struct rapl_domain *rd;
1673 int i;
1674
1675 cpus_read_lock();
1676 list_for_each_entry(rp, &rapl_packages, plist) {
1677 if (!rp->power_zone)
1678 continue;
1679 rd = power_zone_to_rapl_domain(rp->power_zone);
1680 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
1681 if (rd->rpl[i].last_power_limit)
1682 rapl_write_pl_data(rd, i, PL_LIMIT,
1683 rd->rpl[i].last_power_limit);
1684 }
1685 cpus_read_unlock();
1686 }
1687
rapl_pm_callback(struct notifier_block * nb,unsigned long mode,void * _unused)1688 static int rapl_pm_callback(struct notifier_block *nb,
1689 unsigned long mode, void *_unused)
1690 {
1691 switch (mode) {
1692 case PM_SUSPEND_PREPARE:
1693 power_limit_state_save();
1694 break;
1695 case PM_POST_SUSPEND:
1696 power_limit_state_restore();
1697 break;
1698 }
1699 return NOTIFY_OK;
1700 }
1701
1702 static struct notifier_block rapl_pm_notifier = {
1703 .notifier_call = rapl_pm_callback,
1704 };
1705
1706 static struct platform_device *rapl_msr_platdev;
1707
rapl_init(void)1708 static int __init rapl_init(void)
1709 {
1710 const struct x86_cpu_id *id;
1711 int ret;
1712
1713 id = x86_match_cpu(rapl_ids);
1714 if (id) {
1715 defaults_msr = (struct rapl_defaults *)id->driver_data;
1716
1717 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
1718 if (!rapl_msr_platdev)
1719 return -ENOMEM;
1720
1721 ret = platform_device_add(rapl_msr_platdev);
1722 if (ret) {
1723 platform_device_put(rapl_msr_platdev);
1724 return ret;
1725 }
1726 }
1727
1728 ret = register_pm_notifier(&rapl_pm_notifier);
1729 if (ret && rapl_msr_platdev) {
1730 platform_device_del(rapl_msr_platdev);
1731 platform_device_put(rapl_msr_platdev);
1732 }
1733
1734 return ret;
1735 }
1736
rapl_exit(void)1737 static void __exit rapl_exit(void)
1738 {
1739 platform_device_unregister(rapl_msr_platdev);
1740 unregister_pm_notifier(&rapl_pm_notifier);
1741 }
1742
1743 fs_initcall(rapl_init);
1744 module_exit(rapl_exit);
1745
1746 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
1747 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
1748 MODULE_LICENSE("GPL v2");
1749