1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Xtensa Performance Monitor Module driver 4 * See Tensilica Debug User's Guide for PMU registers documentation. 5 * 6 * Copyright (C) 2015 Cadence Design Systems Inc. 7 */ 8 9 #include <linux/interrupt.h> 10 #include <linux/irqdomain.h> 11 #include <linux/module.h> 12 #include <linux/of.h> 13 #include <linux/perf_event.h> 14 #include <linux/platform_device.h> 15 16 #include <asm/processor.h> 17 #include <asm/stacktrace.h> 18 19 /* Global control/status for all perf counters */ 20 #define XTENSA_PMU_PMG 0x1000 21 /* Perf counter values */ 22 #define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) 23 /* Perf counter control registers */ 24 #define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) 25 /* Perf counter status registers */ 26 #define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) 27 28 #define XTENSA_PMU_PMG_PMEN 0x1 29 30 #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL 31 #define XTENSA_PMU_COUNTER_MAX 0x7fffffff 32 33 #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 34 #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 35 #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 36 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 37 #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 38 #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 39 #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 40 41 #define XTENSA_PMU_MASK(select, mask) \ 42 (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ 43 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ 44 XTENSA_PMU_PMCTRL_TRACELEVEL | \ 45 XTENSA_PMU_PMCTRL_INTEN) 46 47 #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 48 #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 49 50 struct xtensa_pmu_events { 51 /* Array of events currently on this core */ 52 struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; 53 /* Bitmap of used hardware counters */ 54 unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; 55 }; 56 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); 57 58 static const u32 xtensa_hw_ctl[] = { 59 [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), 60 [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), 61 [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), 62 [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), 63 /* Taken and non-taken branches + taken loop ends */ 64 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), 65 /* Instruction-related + other global stall cycles */ 66 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), 67 /* Data-related global stall cycles */ 68 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), 69 }; 70 71 #define C(_x) PERF_COUNT_HW_CACHE_##_x 72 73 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { 74 [C(L1D)] = { 75 [C(OP_READ)] = { 76 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), 77 [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), 78 }, 79 [C(OP_WRITE)] = { 80 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), 81 [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), 82 }, 83 }, 84 [C(L1I)] = { 85 [C(OP_READ)] = { 86 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), 87 [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), 88 }, 89 }, 90 [C(DTLB)] = { 91 [C(OP_READ)] = { 92 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), 93 [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), 94 }, 95 }, 96 [C(ITLB)] = { 97 [C(OP_READ)] = { 98 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), 99 [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), 100 }, 101 }, 102 }; 103 104 static int xtensa_pmu_cache_event(u64 config) 105 { 106 unsigned int cache_type, cache_op, cache_result; 107 int ret; 108 109 cache_type = (config >> 0) & 0xff; 110 cache_op = (config >> 8) & 0xff; 111 cache_result = (config >> 16) & 0xff; 112 113 if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || 114 cache_op >= C(OP_MAX) || 115 cache_result >= C(RESULT_MAX)) 116 return -EINVAL; 117 118 ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; 119 120 if (ret == 0) 121 return -EINVAL; 122 123 return ret; 124 } 125 126 static inline uint32_t xtensa_pmu_read_counter(int idx) 127 { 128 return get_er(XTENSA_PMU_PM(idx)); 129 } 130 131 static inline void xtensa_pmu_write_counter(int idx, uint32_t v) 132 { 133 set_er(v, XTENSA_PMU_PM(idx)); 134 } 135 136 static void xtensa_perf_event_update(struct perf_event *event, 137 struct hw_perf_event *hwc, int idx) 138 { 139 uint64_t prev_raw_count, new_raw_count; 140 int64_t delta; 141 142 do { 143 prev_raw_count = local64_read(&hwc->prev_count); 144 new_raw_count = xtensa_pmu_read_counter(event->hw.idx); 145 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 146 new_raw_count) != prev_raw_count); 147 148 delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; 149 150 local64_add(delta, &event->count); 151 local64_sub(delta, &hwc->period_left); 152 } 153 154 static bool xtensa_perf_event_set_period(struct perf_event *event, 155 struct hw_perf_event *hwc, int idx) 156 { 157 bool rc = false; 158 s64 left; 159 160 if (!is_sampling_event(event)) { 161 left = XTENSA_PMU_COUNTER_MAX; 162 } else { 163 s64 period = hwc->sample_period; 164 165 left = local64_read(&hwc->period_left); 166 if (left <= -period) { 167 left = period; 168 local64_set(&hwc->period_left, left); 169 hwc->last_period = period; 170 rc = true; 171 } else if (left <= 0) { 172 left += period; 173 local64_set(&hwc->period_left, left); 174 hwc->last_period = period; 175 rc = true; 176 } 177 if (left > XTENSA_PMU_COUNTER_MAX) 178 left = XTENSA_PMU_COUNTER_MAX; 179 } 180 181 local64_set(&hwc->prev_count, -left); 182 xtensa_pmu_write_counter(idx, -left); 183 perf_event_update_userpage(event); 184 185 return rc; 186 } 187 188 static void xtensa_pmu_enable(struct pmu *pmu) 189 { 190 set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 191 } 192 193 static void xtensa_pmu_disable(struct pmu *pmu) 194 { 195 set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 196 } 197 198 static int xtensa_pmu_event_init(struct perf_event *event) 199 { 200 int ret; 201 202 switch (event->attr.type) { 203 case PERF_TYPE_HARDWARE: 204 if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || 205 xtensa_hw_ctl[event->attr.config] == 0) 206 return -EINVAL; 207 event->hw.config = xtensa_hw_ctl[event->attr.config]; 208 return 0; 209 210 case PERF_TYPE_HW_CACHE: 211 ret = xtensa_pmu_cache_event(event->attr.config); 212 if (ret < 0) 213 return ret; 214 event->hw.config = ret; 215 return 0; 216 217 case PERF_TYPE_RAW: 218 /* Not 'previous counter' select */ 219 if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == 220 (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) 221 return -EINVAL; 222 event->hw.config = (event->attr.config & 223 (XTENSA_PMU_PMCTRL_KRNLCNT | 224 XTENSA_PMU_PMCTRL_TRACELEVEL | 225 XTENSA_PMU_PMCTRL_SELECT | 226 XTENSA_PMU_PMCTRL_MASK)) | 227 XTENSA_PMU_PMCTRL_INTEN; 228 return 0; 229 230 default: 231 return -ENOENT; 232 } 233 } 234 235 /* 236 * Starts/Stops a counter present on the PMU. The PMI handler 237 * should stop the counter when perf_event_overflow() returns 238 * !0. ->start() will be used to continue. 239 */ 240 static void xtensa_pmu_start(struct perf_event *event, int flags) 241 { 242 struct hw_perf_event *hwc = &event->hw; 243 int idx = hwc->idx; 244 245 if (WARN_ON_ONCE(idx == -1)) 246 return; 247 248 if (flags & PERF_EF_RELOAD) { 249 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 250 xtensa_perf_event_set_period(event, hwc, idx); 251 } 252 253 hwc->state = 0; 254 255 set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); 256 } 257 258 static void xtensa_pmu_stop(struct perf_event *event, int flags) 259 { 260 struct hw_perf_event *hwc = &event->hw; 261 int idx = hwc->idx; 262 263 if (!(hwc->state & PERF_HES_STOPPED)) { 264 set_er(0, XTENSA_PMU_PMCTRL(idx)); 265 set_er(get_er(XTENSA_PMU_PMSTAT(idx)), 266 XTENSA_PMU_PMSTAT(idx)); 267 hwc->state |= PERF_HES_STOPPED; 268 } 269 270 if ((flags & PERF_EF_UPDATE) && 271 !(event->hw.state & PERF_HES_UPTODATE)) { 272 xtensa_perf_event_update(event, &event->hw, idx); 273 event->hw.state |= PERF_HES_UPTODATE; 274 } 275 } 276 277 /* 278 * Adds/Removes a counter to/from the PMU, can be done inside 279 * a transaction, see the ->*_txn() methods. 280 */ 281 static int xtensa_pmu_add(struct perf_event *event, int flags) 282 { 283 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 284 struct hw_perf_event *hwc = &event->hw; 285 int idx = hwc->idx; 286 287 if (__test_and_set_bit(idx, ev->used_mask)) { 288 idx = find_first_zero_bit(ev->used_mask, 289 XCHAL_NUM_PERF_COUNTERS); 290 if (idx == XCHAL_NUM_PERF_COUNTERS) 291 return -EAGAIN; 292 293 __set_bit(idx, ev->used_mask); 294 hwc->idx = idx; 295 } 296 ev->event[idx] = event; 297 298 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 299 300 if (flags & PERF_EF_START) 301 xtensa_pmu_start(event, PERF_EF_RELOAD); 302 303 perf_event_update_userpage(event); 304 return 0; 305 } 306 307 static void xtensa_pmu_del(struct perf_event *event, int flags) 308 { 309 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 310 311 xtensa_pmu_stop(event, PERF_EF_UPDATE); 312 __clear_bit(event->hw.idx, ev->used_mask); 313 perf_event_update_userpage(event); 314 } 315 316 static void xtensa_pmu_read(struct perf_event *event) 317 { 318 xtensa_perf_event_update(event, &event->hw, event->hw.idx); 319 } 320 321 static int callchain_trace(struct stackframe *frame, void *data) 322 { 323 struct perf_callchain_entry_ctx *entry = data; 324 325 perf_callchain_store(entry, frame->pc); 326 return 0; 327 } 328 329 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 330 struct pt_regs *regs) 331 { 332 xtensa_backtrace_kernel(regs, entry->max_stack, 333 callchain_trace, NULL, entry); 334 } 335 336 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 337 struct pt_regs *regs) 338 { 339 xtensa_backtrace_user(regs, entry->max_stack, 340 callchain_trace, entry); 341 } 342 343 void perf_event_print_debug(void) 344 { 345 unsigned long flags; 346 unsigned i; 347 348 local_irq_save(flags); 349 pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), 350 get_er(XTENSA_PMU_PMG)); 351 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) 352 pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", 353 i, get_er(XTENSA_PMU_PM(i)), 354 i, get_er(XTENSA_PMU_PMCTRL(i)), 355 i, get_er(XTENSA_PMU_PMSTAT(i))); 356 local_irq_restore(flags); 357 } 358 359 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) 360 { 361 irqreturn_t rc = IRQ_NONE; 362 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 363 unsigned i; 364 365 for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); 366 i < XCHAL_NUM_PERF_COUNTERS; 367 i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { 368 uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); 369 struct perf_event *event = ev->event[i]; 370 struct hw_perf_event *hwc = &event->hw; 371 u64 last_period; 372 373 if (!(v & XTENSA_PMU_PMSTAT_OVFL)) 374 continue; 375 376 set_er(v, XTENSA_PMU_PMSTAT(i)); 377 xtensa_perf_event_update(event, hwc, i); 378 last_period = hwc->last_period; 379 if (xtensa_perf_event_set_period(event, hwc, i)) { 380 struct perf_sample_data data; 381 struct pt_regs *regs = get_irq_regs(); 382 383 perf_sample_data_init(&data, 0, last_period); 384 if (perf_event_overflow(event, &data, regs)) 385 xtensa_pmu_stop(event, 0); 386 } 387 388 rc = IRQ_HANDLED; 389 } 390 return rc; 391 } 392 393 static struct pmu xtensa_pmu = { 394 .pmu_enable = xtensa_pmu_enable, 395 .pmu_disable = xtensa_pmu_disable, 396 .event_init = xtensa_pmu_event_init, 397 .add = xtensa_pmu_add, 398 .del = xtensa_pmu_del, 399 .start = xtensa_pmu_start, 400 .stop = xtensa_pmu_stop, 401 .read = xtensa_pmu_read, 402 }; 403 404 static int xtensa_pmu_setup(int cpu) 405 { 406 unsigned i; 407 408 set_er(0, XTENSA_PMU_PMG); 409 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { 410 set_er(0, XTENSA_PMU_PMCTRL(i)); 411 set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); 412 } 413 return 0; 414 } 415 416 static int __init xtensa_pmu_init(void) 417 { 418 int ret; 419 int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); 420 421 ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING, 422 "perf/xtensa:starting", xtensa_pmu_setup, 423 NULL); 424 if (ret) { 425 pr_err("xtensa_pmu: failed to register CPU-hotplug.\n"); 426 return ret; 427 } 428 #if XTENSA_FAKE_NMI 429 enable_irq(irq); 430 #else 431 ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, 432 "pmu", NULL); 433 if (ret < 0) 434 return ret; 435 #endif 436 437 ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); 438 if (ret) 439 free_irq(irq, NULL); 440 441 return ret; 442 } 443 early_initcall(xtensa_pmu_init); 444