xref: /openbmc/linux/arch/riscv/kvm/vcpu_pmu.c (revision 1ee64446)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023 Rivos Inc
4  *
5  * Authors:
6  *     Atish Patra <atishp@rivosinc.com>
7  */
8 
9 #define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kvm_host.h>
13 #include <linux/perf/riscv_pmu.h>
14 #include <asm/csr.h>
15 #include <asm/kvm_vcpu_sbi.h>
16 #include <asm/kvm_vcpu_pmu.h>
17 #include <linux/bitops.h>
18 
19 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
20 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
21 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
22 
23 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
24 	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
25 	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
26 	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
27 	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
28 	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
29 	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
30 	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
31 	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
32 	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
33 	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
34 };
35 
kvm_pmu_get_sample_period(struct kvm_pmc * pmc)36 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
37 {
38 	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
39 	u64 sample_period;
40 
41 	if (!pmc->counter_val)
42 		sample_period = counter_val_mask;
43 	else
44 		sample_period = (-pmc->counter_val) & counter_val_mask;
45 
46 	return sample_period;
47 }
48 
kvm_pmu_get_perf_event_type(unsigned long eidx)49 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
50 {
51 	enum sbi_pmu_event_type etype = get_event_type(eidx);
52 	u32 type = PERF_TYPE_MAX;
53 
54 	switch (etype) {
55 	case SBI_PMU_EVENT_TYPE_HW:
56 		type = PERF_TYPE_HARDWARE;
57 		break;
58 	case SBI_PMU_EVENT_TYPE_CACHE:
59 		type = PERF_TYPE_HW_CACHE;
60 		break;
61 	case SBI_PMU_EVENT_TYPE_RAW:
62 	case SBI_PMU_EVENT_TYPE_FW:
63 		type = PERF_TYPE_RAW;
64 		break;
65 	default:
66 		break;
67 	}
68 
69 	return type;
70 }
71 
kvm_pmu_is_fw_event(unsigned long eidx)72 static bool kvm_pmu_is_fw_event(unsigned long eidx)
73 {
74 	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
75 }
76 
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)77 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
78 {
79 	if (pmc->perf_event) {
80 		perf_event_disable(pmc->perf_event);
81 		perf_event_release_kernel(pmc->perf_event);
82 		pmc->perf_event = NULL;
83 	}
84 }
85 
kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)86 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
87 {
88 	return hw_event_perf_map[sbi_event_code];
89 }
90 
kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)91 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
92 {
93 	u64 config = U64_MAX;
94 	unsigned int cache_type, cache_op, cache_result;
95 
96 	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
97 	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
98 		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
99 	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
100 		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
101 	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
102 
103 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
104 	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
105 	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
106 		return config;
107 
108 	config = cache_type | (cache_op << 8) | (cache_result << 16);
109 
110 	return config;
111 }
112 
kvm_pmu_get_perf_event_config(unsigned long eidx,uint64_t evt_data)113 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
114 {
115 	enum sbi_pmu_event_type etype = get_event_type(eidx);
116 	u32 ecode = get_event_code(eidx);
117 	u64 config = U64_MAX;
118 
119 	switch (etype) {
120 	case SBI_PMU_EVENT_TYPE_HW:
121 		if (ecode < SBI_PMU_HW_GENERAL_MAX)
122 			config = kvm_pmu_get_perf_event_hw_config(ecode);
123 		break;
124 	case SBI_PMU_EVENT_TYPE_CACHE:
125 		config = kvm_pmu_get_perf_event_cache_config(ecode);
126 		break;
127 	case SBI_PMU_EVENT_TYPE_RAW:
128 		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
129 		break;
130 	case SBI_PMU_EVENT_TYPE_FW:
131 		if (ecode < SBI_PMU_FW_MAX)
132 			config = (1ULL << 63) | ecode;
133 		break;
134 	default:
135 		break;
136 	}
137 
138 	return config;
139 }
140 
kvm_pmu_get_fixed_pmc_index(unsigned long eidx)141 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
142 {
143 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
144 	u32 ecode = get_event_code(eidx);
145 
146 	if (etype != SBI_PMU_EVENT_TYPE_HW)
147 		return -EINVAL;
148 
149 	if (ecode == SBI_PMU_HW_CPU_CYCLES)
150 		return 0;
151 	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
152 		return 2;
153 	else
154 		return -EINVAL;
155 }
156 
kvm_pmu_get_programmable_pmc_index(struct kvm_pmu * kvpmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)157 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
158 					      unsigned long cbase, unsigned long cmask)
159 {
160 	int ctr_idx = -1;
161 	int i, pmc_idx;
162 	int min, max;
163 
164 	if (kvm_pmu_is_fw_event(eidx)) {
165 		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
166 		min = kvpmu->num_hw_ctrs;
167 		max = min + kvpmu->num_fw_ctrs;
168 	} else {
169 		/* First 3 counters are reserved for fixed counters */
170 		min = 3;
171 		max = kvpmu->num_hw_ctrs;
172 	}
173 
174 	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
175 		pmc_idx = i + cbase;
176 		if ((pmc_idx >= min && pmc_idx < max) &&
177 		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
178 			ctr_idx = pmc_idx;
179 			break;
180 		}
181 	}
182 
183 	return ctr_idx;
184 }
185 
pmu_get_pmc_index(struct kvm_pmu * pmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)186 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
187 			     unsigned long cbase, unsigned long cmask)
188 {
189 	int ret;
190 
191 	/* Fixed counters need to be have fixed mapping as they have different width */
192 	ret = kvm_pmu_get_fixed_pmc_index(eidx);
193 	if (ret >= 0)
194 		return ret;
195 
196 	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
197 }
198 
pmu_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)199 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
200 			unsigned long *out_val)
201 {
202 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
203 	struct kvm_pmc *pmc;
204 	u64 enabled, running;
205 	int fevent_code;
206 
207 	pmc = &kvpmu->pmc[cidx];
208 
209 	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
210 		fevent_code = get_event_code(pmc->event_idx);
211 		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
212 	} else if (pmc->perf_event) {
213 		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
214 	} else {
215 		return -EINVAL;
216 	}
217 	*out_val = pmc->counter_val;
218 
219 	return 0;
220 }
221 
kvm_pmu_validate_counter_mask(struct kvm_pmu * kvpmu,unsigned long ctr_base,unsigned long ctr_mask)222 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
223 					 unsigned long ctr_mask)
224 {
225 	/* Make sure the we have a valid counter mask requested from the caller */
226 	if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
227 		return -EINVAL;
228 
229 	return 0;
230 }
231 
kvm_pmu_create_perf_event(struct kvm_pmc * pmc,struct perf_event_attr * attr,unsigned long flags,unsigned long eidx,unsigned long evtdata)232 static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
233 				     unsigned long flags, unsigned long eidx, unsigned long evtdata)
234 {
235 	struct perf_event *event;
236 
237 	kvm_pmu_release_perf_event(pmc);
238 	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
239 	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
240 		//TODO: Do we really want to clear the value in hardware counter
241 		pmc->counter_val = 0;
242 	}
243 
244 	/*
245 	 * Set the default sample_period for now. The guest specified value
246 	 * will be updated in the start call.
247 	 */
248 	attr->sample_period = kvm_pmu_get_sample_period(pmc);
249 
250 	event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
251 	if (IS_ERR(event)) {
252 		pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
253 		return PTR_ERR(event);
254 	}
255 
256 	pmc->perf_event = event;
257 	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
258 		perf_event_enable(pmc->perf_event);
259 
260 	return 0;
261 }
262 
kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu * vcpu,unsigned long fid)263 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
264 {
265 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
266 	struct kvm_fw_event *fevent;
267 
268 	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
269 		return -EINVAL;
270 
271 	fevent = &kvpmu->fw_event[fid];
272 	if (fevent->started)
273 		fevent->value++;
274 
275 	return 0;
276 }
277 
kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu * vcpu,unsigned int csr_num,unsigned long * val,unsigned long new_val,unsigned long wr_mask)278 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
279 				unsigned long *val, unsigned long new_val,
280 				unsigned long wr_mask)
281 {
282 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
283 	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
284 
285 	if (!kvpmu || !kvpmu->init_done) {
286 		/*
287 		 * In absence of sscofpmf in the platform, the guest OS may use
288 		 * the legacy PMU driver to read cycle/instret. In that case,
289 		 * just return 0 to avoid any illegal trap. However, any other
290 		 * hpmcounter access should result in illegal trap as they must
291 		 * be access through SBI PMU only.
292 		 */
293 		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
294 			*val = 0;
295 			return ret;
296 		} else {
297 			return KVM_INSN_ILLEGAL_TRAP;
298 		}
299 	}
300 
301 	/* The counter CSR are read only. Thus, any write should result in illegal traps */
302 	if (wr_mask)
303 		return KVM_INSN_ILLEGAL_TRAP;
304 
305 	cidx = csr_num - CSR_CYCLE;
306 
307 	if (pmu_ctr_read(vcpu, cidx, val) < 0)
308 		return KVM_INSN_ILLEGAL_TRAP;
309 
310 	return ret;
311 }
312 
kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu * vcpu,struct kvm_vcpu_sbi_return * retdata)313 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
314 				struct kvm_vcpu_sbi_return *retdata)
315 {
316 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
317 
318 	retdata->out_val = kvm_pmu_num_counters(kvpmu);
319 
320 	return 0;
321 }
322 
kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)323 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
324 				struct kvm_vcpu_sbi_return *retdata)
325 {
326 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
327 
328 	if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
329 		retdata->err_val = SBI_ERR_INVALID_PARAM;
330 		return 0;
331 	}
332 
333 	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
334 
335 	return 0;
336 }
337 
kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,u64 ival,struct kvm_vcpu_sbi_return * retdata)338 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
339 				 unsigned long ctr_mask, unsigned long flags, u64 ival,
340 				 struct kvm_vcpu_sbi_return *retdata)
341 {
342 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
343 	int i, pmc_index, sbiret = 0;
344 	struct kvm_pmc *pmc;
345 	int fevent_code;
346 
347 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
348 		sbiret = SBI_ERR_INVALID_PARAM;
349 		goto out;
350 	}
351 
352 	/* Start the counters that have been configured and requested by the guest */
353 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
354 		pmc_index = i + ctr_base;
355 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
356 			continue;
357 		pmc = &kvpmu->pmc[pmc_index];
358 		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
359 			pmc->counter_val = ival;
360 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
361 			fevent_code = get_event_code(pmc->event_idx);
362 			if (fevent_code >= SBI_PMU_FW_MAX) {
363 				sbiret = SBI_ERR_INVALID_PARAM;
364 				goto out;
365 			}
366 
367 			/* Check if the counter was already started for some reason */
368 			if (kvpmu->fw_event[fevent_code].started) {
369 				sbiret = SBI_ERR_ALREADY_STARTED;
370 				continue;
371 			}
372 
373 			kvpmu->fw_event[fevent_code].started = true;
374 			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
375 		} else if (pmc->perf_event) {
376 			if (unlikely(pmc->started)) {
377 				sbiret = SBI_ERR_ALREADY_STARTED;
378 				continue;
379 			}
380 			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
381 			perf_event_enable(pmc->perf_event);
382 			pmc->started = true;
383 		} else {
384 			sbiret = SBI_ERR_INVALID_PARAM;
385 		}
386 	}
387 
388 out:
389 	retdata->err_val = sbiret;
390 
391 	return 0;
392 }
393 
kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)394 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
395 				unsigned long ctr_mask, unsigned long flags,
396 				struct kvm_vcpu_sbi_return *retdata)
397 {
398 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
399 	int i, pmc_index, sbiret = 0;
400 	u64 enabled, running;
401 	struct kvm_pmc *pmc;
402 	int fevent_code;
403 
404 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
405 		sbiret = SBI_ERR_INVALID_PARAM;
406 		goto out;
407 	}
408 
409 	/* Stop the counters that have been configured and requested by the guest */
410 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
411 		pmc_index = i + ctr_base;
412 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
413 			continue;
414 		pmc = &kvpmu->pmc[pmc_index];
415 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
416 			fevent_code = get_event_code(pmc->event_idx);
417 			if (fevent_code >= SBI_PMU_FW_MAX) {
418 				sbiret = SBI_ERR_INVALID_PARAM;
419 				goto out;
420 			}
421 
422 			if (!kvpmu->fw_event[fevent_code].started)
423 				sbiret = SBI_ERR_ALREADY_STOPPED;
424 
425 			kvpmu->fw_event[fevent_code].started = false;
426 		} else if (pmc->perf_event) {
427 			if (pmc->started) {
428 				/* Stop counting the counter */
429 				perf_event_disable(pmc->perf_event);
430 				pmc->started = false;
431 			} else {
432 				sbiret = SBI_ERR_ALREADY_STOPPED;
433 			}
434 
435 			if (flags & SBI_PMU_STOP_FLAG_RESET) {
436 				/* Relase the counter if this is a reset request */
437 				pmc->counter_val += perf_event_read_value(pmc->perf_event,
438 									  &enabled, &running);
439 				kvm_pmu_release_perf_event(pmc);
440 			}
441 		} else {
442 			sbiret = SBI_ERR_INVALID_PARAM;
443 		}
444 		if (flags & SBI_PMU_STOP_FLAG_RESET) {
445 			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
446 			clear_bit(pmc_index, kvpmu->pmc_in_use);
447 		}
448 	}
449 
450 out:
451 	retdata->err_val = sbiret;
452 
453 	return 0;
454 }
455 
kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,unsigned long eidx,u64 evtdata,struct kvm_vcpu_sbi_return * retdata)456 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
457 				     unsigned long ctr_mask, unsigned long flags,
458 				     unsigned long eidx, u64 evtdata,
459 				     struct kvm_vcpu_sbi_return *retdata)
460 {
461 	int ctr_idx, ret, sbiret = 0;
462 	bool is_fevent;
463 	unsigned long event_code;
464 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
465 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
466 	struct kvm_pmc *pmc = NULL;
467 	struct perf_event_attr attr = {
468 		.type = etype,
469 		.size = sizeof(struct perf_event_attr),
470 		.pinned = true,
471 		/*
472 		 * It should never reach here if the platform doesn't support the sscofpmf
473 		 * extension as mode filtering won't work without it.
474 		 */
475 		.exclude_host = true,
476 		.exclude_hv = true,
477 		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
478 		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
479 		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
480 	};
481 
482 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
483 		sbiret = SBI_ERR_INVALID_PARAM;
484 		goto out;
485 	}
486 
487 	event_code = get_event_code(eidx);
488 	is_fevent = kvm_pmu_is_fw_event(eidx);
489 	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
490 		sbiret = SBI_ERR_NOT_SUPPORTED;
491 		goto out;
492 	}
493 
494 	/*
495 	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
496 	 * for this event. Just do a sanity check if it already marked used.
497 	 */
498 	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
499 		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
500 			sbiret = SBI_ERR_FAILURE;
501 			goto out;
502 		}
503 		ctr_idx = ctr_base + __ffs(ctr_mask);
504 	} else  {
505 		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
506 		if (ctr_idx < 0) {
507 			sbiret = SBI_ERR_NOT_SUPPORTED;
508 			goto out;
509 		}
510 	}
511 
512 	pmc = &kvpmu->pmc[ctr_idx];
513 	pmc->idx = ctr_idx;
514 
515 	if (is_fevent) {
516 		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
517 			kvpmu->fw_event[event_code].started = true;
518 	} else {
519 		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
520 		if (ret)
521 			return ret;
522 	}
523 
524 	set_bit(ctr_idx, kvpmu->pmc_in_use);
525 	pmc->event_idx = eidx;
526 	retdata->out_val = ctr_idx;
527 out:
528 	retdata->err_val = sbiret;
529 
530 	return 0;
531 }
532 
kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)533 int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
534 				struct kvm_vcpu_sbi_return *retdata)
535 {
536 	int ret;
537 
538 	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
539 	if (ret == -EINVAL)
540 		retdata->err_val = SBI_ERR_INVALID_PARAM;
541 
542 	return 0;
543 }
544 
kvm_riscv_vcpu_pmu_init(struct kvm_vcpu * vcpu)545 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
546 {
547 	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
548 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
549 	struct kvm_pmc *pmc;
550 
551 	/*
552 	 * PMU functionality should be only available to guests if privilege mode
553 	 * filtering is available in the host. Otherwise, guest will always count
554 	 * events while the execution is in hypervisor mode.
555 	 */
556 	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
557 		return;
558 
559 	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
560 	if (ret < 0 || !hpm_width || !num_hw_ctrs)
561 		return;
562 
563 	/*
564 	 * Increase the number of hardware counters to offset the time counter.
565 	 */
566 	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
567 	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
568 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
569 
570 	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
571 		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
572 		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
573 	}
574 
575 	/*
576 	 * There is no correlation between the logical hardware counter and virtual counters.
577 	 * However, we need to encode a hpmcounter CSR in the counter info field so that
578 	 * KVM can trap n emulate the read. This works well in the migration use case as
579 	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
580 	 */
581 	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
582 		/* TIME CSR shouldn't be read from perf interface */
583 		if (i == 1)
584 			continue;
585 		pmc = &kvpmu->pmc[i];
586 		pmc->idx = i;
587 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
588 		if (i < kvpmu->num_hw_ctrs) {
589 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
590 			if (i < 3)
591 				/* CY, IR counters */
592 				pmc->cinfo.width = 63;
593 			else
594 				pmc->cinfo.width = hpm_width;
595 			/*
596 			 * The CSR number doesn't have any relation with the logical
597 			 * hardware counters. The CSR numbers are encoded sequentially
598 			 * to avoid maintaining a map between the virtual counter
599 			 * and CSR number.
600 			 */
601 			pmc->cinfo.csr = CSR_CYCLE + i;
602 		} else {
603 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
604 			pmc->cinfo.width = BITS_PER_LONG - 1;
605 		}
606 	}
607 
608 	kvpmu->init_done = true;
609 }
610 
kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu * vcpu)611 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
612 {
613 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
614 	struct kvm_pmc *pmc;
615 	int i;
616 
617 	if (!kvpmu)
618 		return;
619 
620 	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
621 		pmc = &kvpmu->pmc[i];
622 		pmc->counter_val = 0;
623 		kvm_pmu_release_perf_event(pmc);
624 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
625 	}
626 	bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
627 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
628 }
629 
kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu * vcpu)630 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
631 {
632 	kvm_riscv_vcpu_pmu_deinit(vcpu);
633 }
634