1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Performance events - AMD Processor Power Reporting Mechanism 4 * 5 * Copyright (C) 2016 Advanced Micro Devices, Inc. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 */ 9 10 #include <linux/module.h> 11 #include <linux/slab.h> 12 #include <linux/perf_event.h> 13 #include <asm/cpu_device_id.h> 14 #include "../perf_event.h" 15 16 /* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ 17 #define AMD_POWER_EVENT_MASK 0xFFULL 18 19 /* 20 * Accumulated power status counters. 21 */ 22 #define AMD_POWER_EVENTSEL_PKG 1 23 24 /* 25 * The ratio of compute unit power accumulator sample period to the 26 * PTSC period. 27 */ 28 static unsigned int cpu_pwr_sample_ratio; 29 30 /* Maximum accumulated power of a compute unit. */ 31 static u64 max_cu_acc_power; 32 33 static struct pmu pmu_class; 34 35 /* 36 * Accumulated power represents the sum of each compute unit's (CU) power 37 * consumption. On any core of each CU we read the total accumulated power from 38 * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores 39 * which are picked to measure the power for the CUs they belong to. 40 */ 41 static cpumask_t cpu_mask; 42 43 static void event_update(struct perf_event *event) 44 { 45 struct hw_perf_event *hwc = &event->hw; 46 u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc; 47 u64 delta, tdelta; 48 49 prev_pwr_acc = hwc->pwr_acc; 50 prev_ptsc = hwc->ptsc; 51 rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); 52 rdmsrl(MSR_F15H_PTSC, new_ptsc); 53 54 /* 55 * Calculate the CU power consumption over a time period, the unit of 56 * final value (delta) is micro-Watts. Then add it to the event count. 57 */ 58 if (new_pwr_acc < prev_pwr_acc) { 59 delta = max_cu_acc_power + new_pwr_acc; 60 delta -= prev_pwr_acc; 61 } else 62 delta = new_pwr_acc - prev_pwr_acc; 63 64 delta *= cpu_pwr_sample_ratio * 1000; 65 tdelta = new_ptsc - prev_ptsc; 66 67 do_div(delta, tdelta); 68 local64_add(delta, &event->count); 69 } 70 71 static void __pmu_event_start(struct perf_event *event) 72 { 73 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 74 return; 75 76 event->hw.state = 0; 77 78 rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); 79 rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); 80 } 81 82 static void pmu_event_start(struct perf_event *event, int mode) 83 { 84 __pmu_event_start(event); 85 } 86 87 static void pmu_event_stop(struct perf_event *event, int mode) 88 { 89 struct hw_perf_event *hwc = &event->hw; 90 91 /* Mark event as deactivated and stopped. */ 92 if (!(hwc->state & PERF_HES_STOPPED)) 93 hwc->state |= PERF_HES_STOPPED; 94 95 /* Check if software counter update is necessary. */ 96 if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 97 /* 98 * Drain the remaining delta count out of an event 99 * that we are disabling: 100 */ 101 event_update(event); 102 hwc->state |= PERF_HES_UPTODATE; 103 } 104 } 105 106 static int pmu_event_add(struct perf_event *event, int mode) 107 { 108 struct hw_perf_event *hwc = &event->hw; 109 110 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 111 112 if (mode & PERF_EF_START) 113 __pmu_event_start(event); 114 115 return 0; 116 } 117 118 static void pmu_event_del(struct perf_event *event, int flags) 119 { 120 pmu_event_stop(event, PERF_EF_UPDATE); 121 } 122 123 static int pmu_event_init(struct perf_event *event) 124 { 125 u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK; 126 127 /* Only look at AMD power events. */ 128 if (event->attr.type != pmu_class.type) 129 return -ENOENT; 130 131 /* Unsupported modes and filters. */ 132 if (event->attr.sample_period) 133 return -EINVAL; 134 135 if (cfg != AMD_POWER_EVENTSEL_PKG) 136 return -EINVAL; 137 138 return 0; 139 } 140 141 static void pmu_event_read(struct perf_event *event) 142 { 143 event_update(event); 144 } 145 146 static ssize_t 147 get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) 148 { 149 return cpumap_print_to_pagebuf(true, buf, &cpu_mask); 150 } 151 152 static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL); 153 154 static struct attribute *pmu_attrs[] = { 155 &dev_attr_cpumask.attr, 156 NULL, 157 }; 158 159 static struct attribute_group pmu_attr_group = { 160 .attrs = pmu_attrs, 161 }; 162 163 /* 164 * Currently it only supports to report the power of each 165 * processor/package. 166 */ 167 EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01"); 168 169 EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts"); 170 171 /* Convert the count from micro-Watts to milli-Watts. */ 172 EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3"); 173 174 static struct attribute *events_attr[] = { 175 EVENT_PTR(power_pkg), 176 EVENT_PTR(power_pkg_unit), 177 EVENT_PTR(power_pkg_scale), 178 NULL, 179 }; 180 181 static struct attribute_group pmu_events_group = { 182 .name = "events", 183 .attrs = events_attr, 184 }; 185 186 PMU_FORMAT_ATTR(event, "config:0-7"); 187 188 static struct attribute *formats_attr[] = { 189 &format_attr_event.attr, 190 NULL, 191 }; 192 193 static struct attribute_group pmu_format_group = { 194 .name = "format", 195 .attrs = formats_attr, 196 }; 197 198 static const struct attribute_group *attr_groups[] = { 199 &pmu_attr_group, 200 &pmu_format_group, 201 &pmu_events_group, 202 NULL, 203 }; 204 205 static struct pmu pmu_class = { 206 .attr_groups = attr_groups, 207 /* system-wide only */ 208 .task_ctx_nr = perf_invalid_context, 209 .event_init = pmu_event_init, 210 .add = pmu_event_add, 211 .del = pmu_event_del, 212 .start = pmu_event_start, 213 .stop = pmu_event_stop, 214 .read = pmu_event_read, 215 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 216 }; 217 218 static int power_cpu_exit(unsigned int cpu) 219 { 220 int target; 221 222 if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) 223 return 0; 224 225 /* 226 * Find a new CPU on the same compute unit, if was set in cpumask 227 * and still some CPUs on compute unit. Then migrate event and 228 * context to new CPU. 229 */ 230 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 231 if (target < nr_cpumask_bits) { 232 cpumask_set_cpu(target, &cpu_mask); 233 perf_pmu_migrate_context(&pmu_class, cpu, target); 234 } 235 return 0; 236 } 237 238 static int power_cpu_init(unsigned int cpu) 239 { 240 int target; 241 242 /* 243 * 1) If any CPU is set at cpu_mask in the same compute unit, do 244 * nothing. 245 * 2) If no CPU is set at cpu_mask in the same compute unit, 246 * set current ONLINE CPU. 247 * 248 * Note: if there is a CPU aside of the new one already in the 249 * sibling mask, then it is also in cpu_mask. 250 */ 251 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 252 if (target >= nr_cpumask_bits) 253 cpumask_set_cpu(cpu, &cpu_mask); 254 return 0; 255 } 256 257 static const struct x86_cpu_id cpu_match[] = { 258 X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), 259 {}, 260 }; 261 262 static int __init amd_power_pmu_init(void) 263 { 264 int ret; 265 266 if (!x86_match_cpu(cpu_match)) 267 return -ENODEV; 268 269 if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) 270 return -ENODEV; 271 272 cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); 273 274 if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { 275 pr_err("Failed to read max compute unit power accumulator MSR\n"); 276 return -ENODEV; 277 } 278 279 280 cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, 281 "perf/x86/amd/power:online", 282 power_cpu_init, power_cpu_exit); 283 284 ret = perf_pmu_register(&pmu_class, "power", -1); 285 if (WARN_ON(ret)) { 286 pr_warn("AMD Power PMU registration failed\n"); 287 return ret; 288 } 289 290 pr_info("AMD Power PMU detected\n"); 291 return ret; 292 } 293 module_init(amd_power_pmu_init); 294 295 static void __exit amd_power_pmu_exit(void) 296 { 297 cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE); 298 perf_pmu_unregister(&pmu_class); 299 } 300 module_exit(amd_power_pmu_exit); 301 302 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 303 MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism"); 304 MODULE_LICENSE("GPL v2"); 305