1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This implementation is based on old RISC-V perf and ARM perf event code 8 * which are in turn based on sparc64 and x86 code. 9 */ 10 11 #include <linux/cpumask.h> 12 #include <linux/irq.h> 13 #include <linux/irqdesc.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/printk.h> 16 #include <linux/smp.h> 17 #include <linux/sched_clock.h> 18 19 #include <asm/sbi.h> 20 21 static bool riscv_perf_user_access(struct perf_event *event) 22 { 23 return ((event->attr.type == PERF_TYPE_HARDWARE) || 24 (event->attr.type == PERF_TYPE_HW_CACHE) || 25 (event->attr.type == PERF_TYPE_RAW)) && 26 !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); 27 } 28 29 void arch_perf_update_userpage(struct perf_event *event, 30 struct perf_event_mmap_page *userpg, u64 now) 31 { 32 struct clock_read_data *rd; 33 unsigned int seq; 34 u64 ns; 35 36 userpg->cap_user_time = 0; 37 userpg->cap_user_time_zero = 0; 38 userpg->cap_user_time_short = 0; 39 userpg->cap_user_rdpmc = riscv_perf_user_access(event); 40 41 #ifdef CONFIG_RISCV_PMU 42 /* 43 * The counters are 64-bit but the priv spec doesn't mandate all the 44 * bits to be implemented: that's why, counter width can vary based on 45 * the cpu vendor. 46 */ 47 if (userpg->cap_user_rdpmc) 48 userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; 49 #endif 50 51 do { 52 rd = sched_clock_read_begin(&seq); 53 54 userpg->time_mult = rd->mult; 55 userpg->time_shift = rd->shift; 56 userpg->time_zero = rd->epoch_ns; 57 userpg->time_cycles = rd->epoch_cyc; 58 userpg->time_mask = rd->sched_clock_mask; 59 60 /* 61 * Subtract the cycle base, such that software that 62 * doesn't know about cap_user_time_short still 'works' 63 * assuming no wraps. 64 */ 65 ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); 66 userpg->time_zero -= ns; 67 68 } while (sched_clock_read_retry(seq)); 69 70 userpg->time_offset = userpg->time_zero - now; 71 72 /* 73 * time_shift is not expected to be greater than 31 due to 74 * the original published conversion algorithm shifting a 75 * 32-bit value (now specifies a 64-bit value) - refer 76 * perf_event_mmap_page documentation in perf_event.h. 77 */ 78 if (userpg->time_shift == 32) { 79 userpg->time_shift = 31; 80 userpg->time_mult >>= 1; 81 } 82 83 /* 84 * Internal timekeeping for enabled/running/stopped times 85 * is always computed with the sched_clock. 86 */ 87 userpg->cap_user_time = 1; 88 userpg->cap_user_time_zero = 1; 89 userpg->cap_user_time_short = 1; 90 } 91 92 static unsigned long csr_read_num(int csr_num) 93 { 94 #define switchcase_csr_read(__csr_num, __val) {\ 95 case __csr_num: \ 96 __val = csr_read(__csr_num); \ 97 break; } 98 #define switchcase_csr_read_2(__csr_num, __val) {\ 99 switchcase_csr_read(__csr_num + 0, __val) \ 100 switchcase_csr_read(__csr_num + 1, __val)} 101 #define switchcase_csr_read_4(__csr_num, __val) {\ 102 switchcase_csr_read_2(__csr_num + 0, __val) \ 103 switchcase_csr_read_2(__csr_num + 2, __val)} 104 #define switchcase_csr_read_8(__csr_num, __val) {\ 105 switchcase_csr_read_4(__csr_num + 0, __val) \ 106 switchcase_csr_read_4(__csr_num + 4, __val)} 107 #define switchcase_csr_read_16(__csr_num, __val) {\ 108 switchcase_csr_read_8(__csr_num + 0, __val) \ 109 switchcase_csr_read_8(__csr_num + 8, __val)} 110 #define switchcase_csr_read_32(__csr_num, __val) {\ 111 switchcase_csr_read_16(__csr_num + 0, __val) \ 112 switchcase_csr_read_16(__csr_num + 16, __val)} 113 114 unsigned long ret = 0; 115 116 switch (csr_num) { 117 switchcase_csr_read_32(CSR_CYCLE, ret) 118 switchcase_csr_read_32(CSR_CYCLEH, ret) 119 default : 120 break; 121 } 122 123 return ret; 124 #undef switchcase_csr_read_32 125 #undef switchcase_csr_read_16 126 #undef switchcase_csr_read_8 127 #undef switchcase_csr_read_4 128 #undef switchcase_csr_read_2 129 #undef switchcase_csr_read 130 } 131 132 /* 133 * Read the CSR of a corresponding counter. 134 */ 135 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) 136 { 137 if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || 138 (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { 139 pr_err("Invalid performance counter csr %lx\n", csr); 140 return -EINVAL; 141 } 142 143 return csr_read_num(csr); 144 } 145 146 u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) 147 { 148 int cwidth; 149 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 150 struct hw_perf_event *hwc = &event->hw; 151 152 if (!rvpmu->ctr_get_width) 153 /** 154 * If the pmu driver doesn't support counter width, set it to default 155 * maximum allowed by the specification. 156 */ 157 cwidth = 63; 158 else { 159 if (hwc->idx == -1) 160 /* Handle init case where idx is not initialized yet */ 161 cwidth = rvpmu->ctr_get_width(0); 162 else 163 cwidth = rvpmu->ctr_get_width(hwc->idx); 164 } 165 166 return GENMASK_ULL(cwidth, 0); 167 } 168 169 u64 riscv_pmu_event_update(struct perf_event *event) 170 { 171 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 172 struct hw_perf_event *hwc = &event->hw; 173 u64 prev_raw_count, new_raw_count; 174 unsigned long cmask; 175 u64 oldval, delta; 176 177 if (!rvpmu->ctr_read) 178 return 0; 179 180 cmask = riscv_pmu_ctr_get_width_mask(event); 181 182 do { 183 prev_raw_count = local64_read(&hwc->prev_count); 184 new_raw_count = rvpmu->ctr_read(event); 185 oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, 186 new_raw_count); 187 } while (oldval != prev_raw_count); 188 189 delta = (new_raw_count - prev_raw_count) & cmask; 190 local64_add(delta, &event->count); 191 local64_sub(delta, &hwc->period_left); 192 193 return delta; 194 } 195 196 void riscv_pmu_stop(struct perf_event *event, int flags) 197 { 198 struct hw_perf_event *hwc = &event->hw; 199 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 200 201 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 202 203 if (!(hwc->state & PERF_HES_STOPPED)) { 204 if (rvpmu->ctr_stop) { 205 rvpmu->ctr_stop(event, 0); 206 hwc->state |= PERF_HES_STOPPED; 207 } 208 riscv_pmu_event_update(event); 209 hwc->state |= PERF_HES_UPTODATE; 210 } 211 } 212 213 int riscv_pmu_event_set_period(struct perf_event *event) 214 { 215 struct hw_perf_event *hwc = &event->hw; 216 s64 left = local64_read(&hwc->period_left); 217 s64 period = hwc->sample_period; 218 int overflow = 0; 219 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 220 221 if (unlikely(left <= -period)) { 222 left = period; 223 local64_set(&hwc->period_left, left); 224 hwc->last_period = period; 225 overflow = 1; 226 } 227 228 if (unlikely(left <= 0)) { 229 left += period; 230 local64_set(&hwc->period_left, left); 231 hwc->last_period = period; 232 overflow = 1; 233 } 234 235 /* 236 * Limit the maximum period to prevent the counter value 237 * from overtaking the one we are about to program. In 238 * effect we are reducing max_period to account for 239 * interrupt latency (and we are being very conservative). 240 */ 241 if (left > (max_period >> 1)) 242 left = (max_period >> 1); 243 244 local64_set(&hwc->prev_count, (u64)-left); 245 246 perf_event_update_userpage(event); 247 248 return overflow; 249 } 250 251 void riscv_pmu_start(struct perf_event *event, int flags) 252 { 253 struct hw_perf_event *hwc = &event->hw; 254 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 255 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 256 u64 init_val; 257 258 if (flags & PERF_EF_RELOAD) 259 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 260 261 hwc->state = 0; 262 riscv_pmu_event_set_period(event); 263 init_val = local64_read(&hwc->prev_count) & max_period; 264 rvpmu->ctr_start(event, init_val); 265 perf_event_update_userpage(event); 266 } 267 268 static int riscv_pmu_add(struct perf_event *event, int flags) 269 { 270 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 271 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 272 struct hw_perf_event *hwc = &event->hw; 273 int idx; 274 275 idx = rvpmu->ctr_get_idx(event); 276 if (idx < 0) 277 return idx; 278 279 hwc->idx = idx; 280 cpuc->events[idx] = event; 281 cpuc->n_events++; 282 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 283 if (flags & PERF_EF_START) 284 riscv_pmu_start(event, PERF_EF_RELOAD); 285 286 /* Propagate our changes to the userspace mapping. */ 287 perf_event_update_userpage(event); 288 289 return 0; 290 } 291 292 static void riscv_pmu_del(struct perf_event *event, int flags) 293 { 294 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 295 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 296 struct hw_perf_event *hwc = &event->hw; 297 298 riscv_pmu_stop(event, PERF_EF_UPDATE); 299 cpuc->events[hwc->idx] = NULL; 300 /* The firmware need to reset the counter mapping */ 301 if (rvpmu->ctr_stop) 302 rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); 303 cpuc->n_events--; 304 if (rvpmu->ctr_clear_idx) 305 rvpmu->ctr_clear_idx(event); 306 perf_event_update_userpage(event); 307 hwc->idx = -1; 308 } 309 310 static void riscv_pmu_read(struct perf_event *event) 311 { 312 riscv_pmu_event_update(event); 313 } 314 315 static int riscv_pmu_event_init(struct perf_event *event) 316 { 317 struct hw_perf_event *hwc = &event->hw; 318 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 319 int mapped_event; 320 u64 event_config = 0; 321 uint64_t cmask; 322 323 hwc->flags = 0; 324 mapped_event = rvpmu->event_map(event, &event_config); 325 if (mapped_event < 0) { 326 pr_debug("event %x:%llx not supported\n", event->attr.type, 327 event->attr.config); 328 return mapped_event; 329 } 330 331 /* 332 * idx is set to -1 because the index of a general event should not be 333 * decided until binding to some counter in pmu->add(). 334 * config will contain the information about counter CSR 335 * the idx will contain the counter index 336 */ 337 hwc->config = event_config; 338 hwc->idx = -1; 339 hwc->event_base = mapped_event; 340 341 if (rvpmu->event_init) 342 rvpmu->event_init(event); 343 344 if (!is_sampling_event(event)) { 345 /* 346 * For non-sampling runs, limit the sample_period to half 347 * of the counter width. That way, the new counter value 348 * is far less likely to overtake the previous one unless 349 * you have some serious IRQ latency issues. 350 */ 351 cmask = riscv_pmu_ctr_get_width_mask(event); 352 hwc->sample_period = cmask >> 1; 353 hwc->last_period = hwc->sample_period; 354 local64_set(&hwc->period_left, hwc->sample_period); 355 } 356 357 return 0; 358 } 359 360 static int riscv_pmu_event_idx(struct perf_event *event) 361 { 362 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 363 364 if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) 365 return 0; 366 367 if (rvpmu->csr_index) 368 return rvpmu->csr_index(event) + 1; 369 370 return 0; 371 } 372 373 static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) 374 { 375 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 376 377 if (rvpmu->event_mapped) { 378 rvpmu->event_mapped(event, mm); 379 perf_event_update_userpage(event); 380 } 381 } 382 383 static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) 384 { 385 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 386 387 if (rvpmu->event_unmapped) { 388 rvpmu->event_unmapped(event, mm); 389 perf_event_update_userpage(event); 390 } 391 } 392 393 struct riscv_pmu *riscv_pmu_alloc(void) 394 { 395 struct riscv_pmu *pmu; 396 int cpuid, i; 397 struct cpu_hw_events *cpuc; 398 399 pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 400 if (!pmu) 401 goto out; 402 403 pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); 404 if (!pmu->hw_events) { 405 pr_info("failed to allocate per-cpu PMU data.\n"); 406 goto out_free_pmu; 407 } 408 409 for_each_possible_cpu(cpuid) { 410 cpuc = per_cpu_ptr(pmu->hw_events, cpuid); 411 cpuc->n_events = 0; 412 for (i = 0; i < RISCV_MAX_COUNTERS; i++) 413 cpuc->events[i] = NULL; 414 } 415 pmu->pmu = (struct pmu) { 416 .event_init = riscv_pmu_event_init, 417 .event_mapped = riscv_pmu_event_mapped, 418 .event_unmapped = riscv_pmu_event_unmapped, 419 .event_idx = riscv_pmu_event_idx, 420 .add = riscv_pmu_add, 421 .del = riscv_pmu_del, 422 .start = riscv_pmu_start, 423 .stop = riscv_pmu_stop, 424 .read = riscv_pmu_read, 425 }; 426 427 return pmu; 428 429 out_free_pmu: 430 kfree(pmu); 431 out: 432 return NULL; 433 } 434