1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Xtensa Performance Monitor Module driver 4 * See Tensilica Debug User's Guide for PMU registers documentation. 5 * 6 * Copyright (C) 2015 Cadence Design Systems Inc. 7 */ 8 9 #include <linux/interrupt.h> 10 #include <linux/irqdomain.h> 11 #include <linux/module.h> 12 #include <linux/of.h> 13 #include <linux/perf_event.h> 14 #include <linux/platform_device.h> 15 16 #include <asm/core.h> 17 #include <asm/processor.h> 18 #include <asm/stacktrace.h> 19 20 #define XTENSA_HWVERSION_RG_2015_0 260000 21 22 #if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0 23 #define XTENSA_PMU_ERI_BASE 0x00101000 24 #else 25 #define XTENSA_PMU_ERI_BASE 0x00001000 26 #endif 27 28 /* Global control/status for all perf counters */ 29 #define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE 30 /* Perf counter values */ 31 #define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4) 32 /* Perf counter control registers */ 33 #define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4) 34 /* Perf counter status registers */ 35 #define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4) 36 37 #define XTENSA_PMU_PMG_PMEN 0x1 38 39 #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL 40 #define XTENSA_PMU_COUNTER_MAX 0x7fffffff 41 42 #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 43 #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 44 #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 45 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 46 #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 47 #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 48 #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 49 50 #define XTENSA_PMU_MASK(select, mask) \ 51 (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ 52 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ 53 XTENSA_PMU_PMCTRL_TRACELEVEL | \ 54 XTENSA_PMU_PMCTRL_INTEN) 55 56 #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 57 #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 58 59 struct xtensa_pmu_events { 60 /* Array of events currently on this core */ 61 struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; 62 /* Bitmap of used hardware counters */ 63 unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; 64 }; 65 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); 66 67 static const u32 xtensa_hw_ctl[] = { 68 [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), 69 [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), 70 [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), 71 [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), 72 /* Taken and non-taken branches + taken loop ends */ 73 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), 74 /* Instruction-related + other global stall cycles */ 75 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), 76 /* Data-related global stall cycles */ 77 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), 78 }; 79 80 #define C(_x) PERF_COUNT_HW_CACHE_##_x 81 82 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { 83 [C(L1D)] = { 84 [C(OP_READ)] = { 85 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), 86 [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), 87 }, 88 [C(OP_WRITE)] = { 89 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), 90 [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), 91 }, 92 }, 93 [C(L1I)] = { 94 [C(OP_READ)] = { 95 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), 96 [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), 97 }, 98 }, 99 [C(DTLB)] = { 100 [C(OP_READ)] = { 101 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), 102 [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), 103 }, 104 }, 105 [C(ITLB)] = { 106 [C(OP_READ)] = { 107 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), 108 [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), 109 }, 110 }, 111 }; 112 113 static int xtensa_pmu_cache_event(u64 config) 114 { 115 unsigned int cache_type, cache_op, cache_result; 116 int ret; 117 118 cache_type = (config >> 0) & 0xff; 119 cache_op = (config >> 8) & 0xff; 120 cache_result = (config >> 16) & 0xff; 121 122 if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || 123 cache_op >= C(OP_MAX) || 124 cache_result >= C(RESULT_MAX)) 125 return -EINVAL; 126 127 ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; 128 129 if (ret == 0) 130 return -EINVAL; 131 132 return ret; 133 } 134 135 static inline uint32_t xtensa_pmu_read_counter(int idx) 136 { 137 return get_er(XTENSA_PMU_PM(idx)); 138 } 139 140 static inline void xtensa_pmu_write_counter(int idx, uint32_t v) 141 { 142 set_er(v, XTENSA_PMU_PM(idx)); 143 } 144 145 static void xtensa_perf_event_update(struct perf_event *event, 146 struct hw_perf_event *hwc, int idx) 147 { 148 uint64_t prev_raw_count, new_raw_count; 149 int64_t delta; 150 151 do { 152 prev_raw_count = local64_read(&hwc->prev_count); 153 new_raw_count = xtensa_pmu_read_counter(event->hw.idx); 154 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 155 new_raw_count) != prev_raw_count); 156 157 delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; 158 159 local64_add(delta, &event->count); 160 local64_sub(delta, &hwc->period_left); 161 } 162 163 static bool xtensa_perf_event_set_period(struct perf_event *event, 164 struct hw_perf_event *hwc, int idx) 165 { 166 bool rc = false; 167 s64 left; 168 169 if (!is_sampling_event(event)) { 170 left = XTENSA_PMU_COUNTER_MAX; 171 } else { 172 s64 period = hwc->sample_period; 173 174 left = local64_read(&hwc->period_left); 175 if (left <= -period) { 176 left = period; 177 local64_set(&hwc->period_left, left); 178 hwc->last_period = period; 179 rc = true; 180 } else if (left <= 0) { 181 left += period; 182 local64_set(&hwc->period_left, left); 183 hwc->last_period = period; 184 rc = true; 185 } 186 if (left > XTENSA_PMU_COUNTER_MAX) 187 left = XTENSA_PMU_COUNTER_MAX; 188 } 189 190 local64_set(&hwc->prev_count, -left); 191 xtensa_pmu_write_counter(idx, -left); 192 perf_event_update_userpage(event); 193 194 return rc; 195 } 196 197 static void xtensa_pmu_enable(struct pmu *pmu) 198 { 199 set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 200 } 201 202 static void xtensa_pmu_disable(struct pmu *pmu) 203 { 204 set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 205 } 206 207 static int xtensa_pmu_event_init(struct perf_event *event) 208 { 209 int ret; 210 211 switch (event->attr.type) { 212 case PERF_TYPE_HARDWARE: 213 if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || 214 xtensa_hw_ctl[event->attr.config] == 0) 215 return -EINVAL; 216 event->hw.config = xtensa_hw_ctl[event->attr.config]; 217 return 0; 218 219 case PERF_TYPE_HW_CACHE: 220 ret = xtensa_pmu_cache_event(event->attr.config); 221 if (ret < 0) 222 return ret; 223 event->hw.config = ret; 224 return 0; 225 226 case PERF_TYPE_RAW: 227 /* Not 'previous counter' select */ 228 if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == 229 (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) 230 return -EINVAL; 231 event->hw.config = (event->attr.config & 232 (XTENSA_PMU_PMCTRL_KRNLCNT | 233 XTENSA_PMU_PMCTRL_TRACELEVEL | 234 XTENSA_PMU_PMCTRL_SELECT | 235 XTENSA_PMU_PMCTRL_MASK)) | 236 XTENSA_PMU_PMCTRL_INTEN; 237 return 0; 238 239 default: 240 return -ENOENT; 241 } 242 } 243 244 /* 245 * Starts/Stops a counter present on the PMU. The PMI handler 246 * should stop the counter when perf_event_overflow() returns 247 * !0. ->start() will be used to continue. 248 */ 249 static void xtensa_pmu_start(struct perf_event *event, int flags) 250 { 251 struct hw_perf_event *hwc = &event->hw; 252 int idx = hwc->idx; 253 254 if (WARN_ON_ONCE(idx == -1)) 255 return; 256 257 if (flags & PERF_EF_RELOAD) { 258 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 259 xtensa_perf_event_set_period(event, hwc, idx); 260 } 261 262 hwc->state = 0; 263 264 set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); 265 } 266 267 static void xtensa_pmu_stop(struct perf_event *event, int flags) 268 { 269 struct hw_perf_event *hwc = &event->hw; 270 int idx = hwc->idx; 271 272 if (!(hwc->state & PERF_HES_STOPPED)) { 273 set_er(0, XTENSA_PMU_PMCTRL(idx)); 274 set_er(get_er(XTENSA_PMU_PMSTAT(idx)), 275 XTENSA_PMU_PMSTAT(idx)); 276 hwc->state |= PERF_HES_STOPPED; 277 } 278 279 if ((flags & PERF_EF_UPDATE) && 280 !(event->hw.state & PERF_HES_UPTODATE)) { 281 xtensa_perf_event_update(event, &event->hw, idx); 282 event->hw.state |= PERF_HES_UPTODATE; 283 } 284 } 285 286 /* 287 * Adds/Removes a counter to/from the PMU, can be done inside 288 * a transaction, see the ->*_txn() methods. 289 */ 290 static int xtensa_pmu_add(struct perf_event *event, int flags) 291 { 292 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 293 struct hw_perf_event *hwc = &event->hw; 294 int idx = hwc->idx; 295 296 if (__test_and_set_bit(idx, ev->used_mask)) { 297 idx = find_first_zero_bit(ev->used_mask, 298 XCHAL_NUM_PERF_COUNTERS); 299 if (idx == XCHAL_NUM_PERF_COUNTERS) 300 return -EAGAIN; 301 302 __set_bit(idx, ev->used_mask); 303 hwc->idx = idx; 304 } 305 ev->event[idx] = event; 306 307 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 308 309 if (flags & PERF_EF_START) 310 xtensa_pmu_start(event, PERF_EF_RELOAD); 311 312 perf_event_update_userpage(event); 313 return 0; 314 } 315 316 static void xtensa_pmu_del(struct perf_event *event, int flags) 317 { 318 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 319 320 xtensa_pmu_stop(event, PERF_EF_UPDATE); 321 __clear_bit(event->hw.idx, ev->used_mask); 322 perf_event_update_userpage(event); 323 } 324 325 static void xtensa_pmu_read(struct perf_event *event) 326 { 327 xtensa_perf_event_update(event, &event->hw, event->hw.idx); 328 } 329 330 static int callchain_trace(struct stackframe *frame, void *data) 331 { 332 struct perf_callchain_entry_ctx *entry = data; 333 334 perf_callchain_store(entry, frame->pc); 335 return 0; 336 } 337 338 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 339 struct pt_regs *regs) 340 { 341 xtensa_backtrace_kernel(regs, entry->max_stack, 342 callchain_trace, NULL, entry); 343 } 344 345 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 346 struct pt_regs *regs) 347 { 348 xtensa_backtrace_user(regs, entry->max_stack, 349 callchain_trace, entry); 350 } 351 352 void perf_event_print_debug(void) 353 { 354 unsigned long flags; 355 unsigned i; 356 357 local_irq_save(flags); 358 pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), 359 get_er(XTENSA_PMU_PMG)); 360 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) 361 pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", 362 i, get_er(XTENSA_PMU_PM(i)), 363 i, get_er(XTENSA_PMU_PMCTRL(i)), 364 i, get_er(XTENSA_PMU_PMSTAT(i))); 365 local_irq_restore(flags); 366 } 367 368 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) 369 { 370 irqreturn_t rc = IRQ_NONE; 371 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 372 unsigned i; 373 374 for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) { 375 uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); 376 struct perf_event *event = ev->event[i]; 377 struct hw_perf_event *hwc = &event->hw; 378 u64 last_period; 379 380 if (!(v & XTENSA_PMU_PMSTAT_OVFL)) 381 continue; 382 383 set_er(v, XTENSA_PMU_PMSTAT(i)); 384 xtensa_perf_event_update(event, hwc, i); 385 last_period = hwc->last_period; 386 if (xtensa_perf_event_set_period(event, hwc, i)) { 387 struct perf_sample_data data; 388 struct pt_regs *regs = get_irq_regs(); 389 390 perf_sample_data_init(&data, 0, last_period); 391 if (perf_event_overflow(event, &data, regs)) 392 xtensa_pmu_stop(event, 0); 393 } 394 395 rc = IRQ_HANDLED; 396 } 397 return rc; 398 } 399 400 static struct pmu xtensa_pmu = { 401 .pmu_enable = xtensa_pmu_enable, 402 .pmu_disable = xtensa_pmu_disable, 403 .event_init = xtensa_pmu_event_init, 404 .add = xtensa_pmu_add, 405 .del = xtensa_pmu_del, 406 .start = xtensa_pmu_start, 407 .stop = xtensa_pmu_stop, 408 .read = xtensa_pmu_read, 409 }; 410 411 static int xtensa_pmu_setup(unsigned int cpu) 412 { 413 unsigned i; 414 415 set_er(0, XTENSA_PMU_PMG); 416 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { 417 set_er(0, XTENSA_PMU_PMCTRL(i)); 418 set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); 419 } 420 return 0; 421 } 422 423 static int __init xtensa_pmu_init(void) 424 { 425 int ret; 426 int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); 427 428 ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING, 429 "perf/xtensa:starting", xtensa_pmu_setup, 430 NULL); 431 if (ret) { 432 pr_err("xtensa_pmu: failed to register CPU-hotplug.\n"); 433 return ret; 434 } 435 #if XTENSA_FAKE_NMI 436 enable_irq(irq); 437 #else 438 ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, 439 "pmu", NULL); 440 if (ret < 0) 441 return ret; 442 #endif 443 444 ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); 445 if (ret) 446 free_irq(irq, NULL); 447 448 return ret; 449 } 450 early_initcall(xtensa_pmu_init); 451