1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This implementation is based on old RISC-V perf and ARM perf event code 8 * which are in turn based on sparc64 and x86 code. 9 */ 10 11 #include <linux/cpumask.h> 12 #include <linux/irq.h> 13 #include <linux/irqdesc.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/printk.h> 16 #include <linux/smp.h> 17 #include <linux/sched_clock.h> 18 19 #include <asm/sbi.h> 20 21 static bool riscv_perf_user_access(struct perf_event *event) 22 { 23 return ((event->attr.type == PERF_TYPE_HARDWARE) || 24 (event->attr.type == PERF_TYPE_HW_CACHE) || 25 (event->attr.type == PERF_TYPE_RAW)) && 26 !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && 27 (event->hw.idx != -1); 28 } 29 30 void arch_perf_update_userpage(struct perf_event *event, 31 struct perf_event_mmap_page *userpg, u64 now) 32 { 33 struct clock_read_data *rd; 34 unsigned int seq; 35 u64 ns; 36 37 userpg->cap_user_time = 0; 38 userpg->cap_user_time_zero = 0; 39 userpg->cap_user_time_short = 0; 40 userpg->cap_user_rdpmc = riscv_perf_user_access(event); 41 42 #ifdef CONFIG_RISCV_PMU 43 /* 44 * The counters are 64-bit but the priv spec doesn't mandate all the 45 * bits to be implemented: that's why, counter width can vary based on 46 * the cpu vendor. 47 */ 48 if (userpg->cap_user_rdpmc) 49 userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; 50 #endif 51 52 do { 53 rd = sched_clock_read_begin(&seq); 54 55 userpg->time_mult = rd->mult; 56 userpg->time_shift = rd->shift; 57 userpg->time_zero = rd->epoch_ns; 58 userpg->time_cycles = rd->epoch_cyc; 59 userpg->time_mask = rd->sched_clock_mask; 60 61 /* 62 * Subtract the cycle base, such that software that 63 * doesn't know about cap_user_time_short still 'works' 64 * assuming no wraps. 65 */ 66 ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); 67 userpg->time_zero -= ns; 68 69 } while (sched_clock_read_retry(seq)); 70 71 userpg->time_offset = userpg->time_zero - now; 72 73 /* 74 * time_shift is not expected to be greater than 31 due to 75 * the original published conversion algorithm shifting a 76 * 32-bit value (now specifies a 64-bit value) - refer 77 * perf_event_mmap_page documentation in perf_event.h. 78 */ 79 if (userpg->time_shift == 32) { 80 userpg->time_shift = 31; 81 userpg->time_mult >>= 1; 82 } 83 84 /* 85 * Internal timekeeping for enabled/running/stopped times 86 * is always computed with the sched_clock. 87 */ 88 userpg->cap_user_time = 1; 89 userpg->cap_user_time_zero = 1; 90 userpg->cap_user_time_short = 1; 91 } 92 93 static unsigned long csr_read_num(int csr_num) 94 { 95 #define switchcase_csr_read(__csr_num, __val) {\ 96 case __csr_num: \ 97 __val = csr_read(__csr_num); \ 98 break; } 99 #define switchcase_csr_read_2(__csr_num, __val) {\ 100 switchcase_csr_read(__csr_num + 0, __val) \ 101 switchcase_csr_read(__csr_num + 1, __val)} 102 #define switchcase_csr_read_4(__csr_num, __val) {\ 103 switchcase_csr_read_2(__csr_num + 0, __val) \ 104 switchcase_csr_read_2(__csr_num + 2, __val)} 105 #define switchcase_csr_read_8(__csr_num, __val) {\ 106 switchcase_csr_read_4(__csr_num + 0, __val) \ 107 switchcase_csr_read_4(__csr_num + 4, __val)} 108 #define switchcase_csr_read_16(__csr_num, __val) {\ 109 switchcase_csr_read_8(__csr_num + 0, __val) \ 110 switchcase_csr_read_8(__csr_num + 8, __val)} 111 #define switchcase_csr_read_32(__csr_num, __val) {\ 112 switchcase_csr_read_16(__csr_num + 0, __val) \ 113 switchcase_csr_read_16(__csr_num + 16, __val)} 114 115 unsigned long ret = 0; 116 117 switch (csr_num) { 118 switchcase_csr_read_32(CSR_CYCLE, ret) 119 switchcase_csr_read_32(CSR_CYCLEH, ret) 120 default : 121 break; 122 } 123 124 return ret; 125 #undef switchcase_csr_read_32 126 #undef switchcase_csr_read_16 127 #undef switchcase_csr_read_8 128 #undef switchcase_csr_read_4 129 #undef switchcase_csr_read_2 130 #undef switchcase_csr_read 131 } 132 133 /* 134 * Read the CSR of a corresponding counter. 135 */ 136 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) 137 { 138 if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || 139 (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { 140 pr_err("Invalid performance counter csr %lx\n", csr); 141 return -EINVAL; 142 } 143 144 return csr_read_num(csr); 145 } 146 147 u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) 148 { 149 int cwidth; 150 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 151 struct hw_perf_event *hwc = &event->hw; 152 153 if (hwc->idx == -1) 154 /* Handle init case where idx is not initialized yet */ 155 cwidth = rvpmu->ctr_get_width(0); 156 else 157 cwidth = rvpmu->ctr_get_width(hwc->idx); 158 159 return GENMASK_ULL(cwidth, 0); 160 } 161 162 u64 riscv_pmu_event_update(struct perf_event *event) 163 { 164 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 165 struct hw_perf_event *hwc = &event->hw; 166 u64 prev_raw_count, new_raw_count; 167 unsigned long cmask; 168 u64 oldval, delta; 169 170 if (!rvpmu->ctr_read) 171 return 0; 172 173 cmask = riscv_pmu_ctr_get_width_mask(event); 174 175 do { 176 prev_raw_count = local64_read(&hwc->prev_count); 177 new_raw_count = rvpmu->ctr_read(event); 178 oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, 179 new_raw_count); 180 } while (oldval != prev_raw_count); 181 182 delta = (new_raw_count - prev_raw_count) & cmask; 183 local64_add(delta, &event->count); 184 local64_sub(delta, &hwc->period_left); 185 186 return delta; 187 } 188 189 void riscv_pmu_stop(struct perf_event *event, int flags) 190 { 191 struct hw_perf_event *hwc = &event->hw; 192 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 193 194 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 195 196 if (!(hwc->state & PERF_HES_STOPPED)) { 197 if (rvpmu->ctr_stop) { 198 rvpmu->ctr_stop(event, 0); 199 hwc->state |= PERF_HES_STOPPED; 200 } 201 riscv_pmu_event_update(event); 202 hwc->state |= PERF_HES_UPTODATE; 203 } 204 } 205 206 int riscv_pmu_event_set_period(struct perf_event *event) 207 { 208 struct hw_perf_event *hwc = &event->hw; 209 s64 left = local64_read(&hwc->period_left); 210 s64 period = hwc->sample_period; 211 int overflow = 0; 212 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 213 214 if (unlikely(left <= -period)) { 215 left = period; 216 local64_set(&hwc->period_left, left); 217 hwc->last_period = period; 218 overflow = 1; 219 } 220 221 if (unlikely(left <= 0)) { 222 left += period; 223 local64_set(&hwc->period_left, left); 224 hwc->last_period = period; 225 overflow = 1; 226 } 227 228 /* 229 * Limit the maximum period to prevent the counter value 230 * from overtaking the one we are about to program. In 231 * effect we are reducing max_period to account for 232 * interrupt latency (and we are being very conservative). 233 */ 234 if (left > (max_period >> 1)) 235 left = (max_period >> 1); 236 237 local64_set(&hwc->prev_count, (u64)-left); 238 239 perf_event_update_userpage(event); 240 241 return overflow; 242 } 243 244 void riscv_pmu_start(struct perf_event *event, int flags) 245 { 246 struct hw_perf_event *hwc = &event->hw; 247 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 248 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 249 u64 init_val; 250 251 if (flags & PERF_EF_RELOAD) 252 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 253 254 hwc->state = 0; 255 riscv_pmu_event_set_period(event); 256 init_val = local64_read(&hwc->prev_count) & max_period; 257 rvpmu->ctr_start(event, init_val); 258 perf_event_update_userpage(event); 259 } 260 261 static int riscv_pmu_add(struct perf_event *event, int flags) 262 { 263 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 264 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 265 struct hw_perf_event *hwc = &event->hw; 266 int idx; 267 268 idx = rvpmu->ctr_get_idx(event); 269 if (idx < 0) 270 return idx; 271 272 hwc->idx = idx; 273 cpuc->events[idx] = event; 274 cpuc->n_events++; 275 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 276 if (flags & PERF_EF_START) 277 riscv_pmu_start(event, PERF_EF_RELOAD); 278 279 /* Propagate our changes to the userspace mapping. */ 280 perf_event_update_userpage(event); 281 282 return 0; 283 } 284 285 static void riscv_pmu_del(struct perf_event *event, int flags) 286 { 287 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 288 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 289 struct hw_perf_event *hwc = &event->hw; 290 291 riscv_pmu_stop(event, PERF_EF_UPDATE); 292 cpuc->events[hwc->idx] = NULL; 293 /* The firmware need to reset the counter mapping */ 294 if (rvpmu->ctr_stop) 295 rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); 296 cpuc->n_events--; 297 if (rvpmu->ctr_clear_idx) 298 rvpmu->ctr_clear_idx(event); 299 perf_event_update_userpage(event); 300 hwc->idx = -1; 301 } 302 303 static void riscv_pmu_read(struct perf_event *event) 304 { 305 riscv_pmu_event_update(event); 306 } 307 308 static int riscv_pmu_event_init(struct perf_event *event) 309 { 310 struct hw_perf_event *hwc = &event->hw; 311 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 312 int mapped_event; 313 u64 event_config = 0; 314 uint64_t cmask; 315 316 /* driver does not support branch stack sampling */ 317 if (has_branch_stack(event)) 318 return -EOPNOTSUPP; 319 320 hwc->flags = 0; 321 mapped_event = rvpmu->event_map(event, &event_config); 322 if (mapped_event < 0) { 323 pr_debug("event %x:%llx not supported\n", event->attr.type, 324 event->attr.config); 325 return mapped_event; 326 } 327 328 /* 329 * idx is set to -1 because the index of a general event should not be 330 * decided until binding to some counter in pmu->add(). 331 * config will contain the information about counter CSR 332 * the idx will contain the counter index 333 */ 334 hwc->config = event_config; 335 hwc->idx = -1; 336 hwc->event_base = mapped_event; 337 338 if (rvpmu->event_init) 339 rvpmu->event_init(event); 340 341 if (!is_sampling_event(event)) { 342 /* 343 * For non-sampling runs, limit the sample_period to half 344 * of the counter width. That way, the new counter value 345 * is far less likely to overtake the previous one unless 346 * you have some serious IRQ latency issues. 347 */ 348 cmask = riscv_pmu_ctr_get_width_mask(event); 349 hwc->sample_period = cmask >> 1; 350 hwc->last_period = hwc->sample_period; 351 local64_set(&hwc->period_left, hwc->sample_period); 352 } 353 354 return 0; 355 } 356 357 static int riscv_pmu_event_idx(struct perf_event *event) 358 { 359 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 360 361 if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) 362 return 0; 363 364 if (rvpmu->csr_index) 365 return rvpmu->csr_index(event) + 1; 366 367 return 0; 368 } 369 370 static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) 371 { 372 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 373 374 if (rvpmu->event_mapped) { 375 rvpmu->event_mapped(event, mm); 376 perf_event_update_userpage(event); 377 } 378 } 379 380 static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) 381 { 382 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 383 384 if (rvpmu->event_unmapped) { 385 rvpmu->event_unmapped(event, mm); 386 perf_event_update_userpage(event); 387 } 388 } 389 390 struct riscv_pmu *riscv_pmu_alloc(void) 391 { 392 struct riscv_pmu *pmu; 393 int cpuid, i; 394 struct cpu_hw_events *cpuc; 395 396 pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 397 if (!pmu) 398 goto out; 399 400 pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); 401 if (!pmu->hw_events) { 402 pr_info("failed to allocate per-cpu PMU data.\n"); 403 goto out_free_pmu; 404 } 405 406 for_each_possible_cpu(cpuid) { 407 cpuc = per_cpu_ptr(pmu->hw_events, cpuid); 408 cpuc->n_events = 0; 409 for (i = 0; i < RISCV_MAX_COUNTERS; i++) 410 cpuc->events[i] = NULL; 411 } 412 pmu->pmu = (struct pmu) { 413 .event_init = riscv_pmu_event_init, 414 .event_mapped = riscv_pmu_event_mapped, 415 .event_unmapped = riscv_pmu_event_unmapped, 416 .event_idx = riscv_pmu_event_idx, 417 .add = riscv_pmu_add, 418 .del = riscv_pmu_del, 419 .start = riscv_pmu_start, 420 .stop = riscv_pmu_stop, 421 .read = riscv_pmu_read, 422 }; 423 424 return pmu; 425 426 out_free_pmu: 427 kfree(pmu); 428 out: 429 return NULL; 430 } 431