1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015 Linaro Limited. All rights reserved.
4  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
5  */
6 
7 #include <linux/coresight.h>
8 #include <linux/coresight-pmu.h>
9 #include <linux/cpumask.h>
10 #include <linux/device.h>
11 #include <linux/list.h>
12 #include <linux/mm.h>
13 #include <linux/init.h>
14 #include <linux/perf_event.h>
15 #include <linux/percpu-defs.h>
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/workqueue.h>
19 
20 #include "coresight-etm-perf.h"
21 #include "coresight-priv.h"
22 
23 static struct pmu etm_pmu;
24 static bool etm_perf_up;
25 
26 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
27 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
28 
29 /* ETMv3.5/PTM's ETMCR is 'config' */
30 PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
31 PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
32 PMU_FORMAT_ATTR(retstack,	"config:" __stringify(ETM_OPT_RETSTK));
33 
34 static struct attribute *etm_config_formats_attr[] = {
35 	&format_attr_cycacc.attr,
36 	&format_attr_timestamp.attr,
37 	&format_attr_retstack.attr,
38 	NULL,
39 };
40 
41 static const struct attribute_group etm_pmu_format_group = {
42 	.name   = "format",
43 	.attrs  = etm_config_formats_attr,
44 };
45 
46 static const struct attribute_group *etm_pmu_attr_groups[] = {
47 	&etm_pmu_format_group,
48 	NULL,
49 };
50 
51 static inline struct list_head **
52 etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu)
53 {
54 	return per_cpu_ptr(data->path, cpu);
55 }
56 
57 static inline struct list_head *
58 etm_event_cpu_path(struct etm_event_data *data, int cpu)
59 {
60 	return *etm_event_cpu_path_ptr(data, cpu);
61 }
62 
63 static void etm_event_read(struct perf_event *event) {}
64 
65 static int etm_addr_filters_alloc(struct perf_event *event)
66 {
67 	struct etm_filters *filters;
68 	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
69 
70 	filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node);
71 	if (!filters)
72 		return -ENOMEM;
73 
74 	if (event->parent)
75 		memcpy(filters, event->parent->hw.addr_filters,
76 		       sizeof(*filters));
77 
78 	event->hw.addr_filters = filters;
79 
80 	return 0;
81 }
82 
83 static void etm_event_destroy(struct perf_event *event)
84 {
85 	kfree(event->hw.addr_filters);
86 	event->hw.addr_filters = NULL;
87 }
88 
89 static int etm_event_init(struct perf_event *event)
90 {
91 	int ret = 0;
92 
93 	if (event->attr.type != etm_pmu.type) {
94 		ret = -ENOENT;
95 		goto out;
96 	}
97 
98 	ret = etm_addr_filters_alloc(event);
99 	if (ret)
100 		goto out;
101 
102 	event->destroy = etm_event_destroy;
103 out:
104 	return ret;
105 }
106 
107 static void free_event_data(struct work_struct *work)
108 {
109 	int cpu;
110 	cpumask_t *mask;
111 	struct etm_event_data *event_data;
112 	struct coresight_device *sink;
113 
114 	event_data = container_of(work, struct etm_event_data, work);
115 	mask = &event_data->mask;
116 
117 	/* Free the sink buffers, if there are any */
118 	if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) {
119 		cpu = cpumask_first(mask);
120 		sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu));
121 		if (sink_ops(sink)->free_buffer)
122 			sink_ops(sink)->free_buffer(event_data->snk_config);
123 	}
124 
125 	for_each_cpu(cpu, mask) {
126 		struct list_head **ppath;
127 
128 		ppath = etm_event_cpu_path_ptr(event_data, cpu);
129 		if (!(IS_ERR_OR_NULL(*ppath)))
130 			coresight_release_path(*ppath);
131 		*ppath = NULL;
132 	}
133 
134 	free_percpu(event_data->path);
135 	kfree(event_data);
136 }
137 
138 static void *alloc_event_data(int cpu)
139 {
140 	cpumask_t *mask;
141 	struct etm_event_data *event_data;
142 
143 	/* First get memory for the session's data */
144 	event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL);
145 	if (!event_data)
146 		return NULL;
147 
148 
149 	mask = &event_data->mask;
150 	if (cpu != -1)
151 		cpumask_set_cpu(cpu, mask);
152 	else
153 		cpumask_copy(mask, cpu_present_mask);
154 
155 	/*
156 	 * Each CPU has a single path between source and destination.  As such
157 	 * allocate an array using CPU numbers as indexes.  That way a path
158 	 * for any CPU can easily be accessed at any given time.  We proceed
159 	 * the same way for sessions involving a single CPU.  The cost of
160 	 * unused memory when dealing with single CPU trace scenarios is small
161 	 * compared to the cost of searching through an optimized array.
162 	 */
163 	event_data->path = alloc_percpu(struct list_head *);
164 
165 	if (!event_data->path) {
166 		kfree(event_data);
167 		return NULL;
168 	}
169 
170 	return event_data;
171 }
172 
173 static void etm_free_aux(void *data)
174 {
175 	struct etm_event_data *event_data = data;
176 
177 	schedule_work(&event_data->work);
178 }
179 
180 static void *etm_setup_aux(int event_cpu, void **pages,
181 			   int nr_pages, bool overwrite)
182 {
183 	int cpu;
184 	cpumask_t *mask;
185 	struct coresight_device *sink;
186 	struct etm_event_data *event_data = NULL;
187 
188 	event_data = alloc_event_data(event_cpu);
189 	if (!event_data)
190 		return NULL;
191 	INIT_WORK(&event_data->work, free_event_data);
192 
193 	/*
194 	 * In theory nothing prevent tracers in a trace session from being
195 	 * associated with different sinks, nor having a sink per tracer.  But
196 	 * until we have HW with this kind of topology we need to assume tracers
197 	 * in a trace session are using the same sink.  Therefore go through
198 	 * the coresight bus and pick the first enabled sink.
199 	 *
200 	 * When operated from sysFS users are responsible to enable the sink
201 	 * while from perf, the perf tools will do it based on the choice made
202 	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.
203 	 */
204 	sink = coresight_get_enabled_sink(true);
205 	if (!sink || !sink_ops(sink)->alloc_buffer)
206 		goto err;
207 
208 	mask = &event_data->mask;
209 
210 	/*
211 	 * Setup the path for each CPU in a trace session. We try to build
212 	 * trace path for each CPU in the mask. If we don't find an ETM
213 	 * for the CPU or fail to build a path, we clear the CPU from the
214 	 * mask and continue with the rest. If ever we try to trace on those
215 	 * CPUs, we can handle it and fail the session.
216 	 */
217 	for_each_cpu(cpu, mask) {
218 		struct list_head *path;
219 		struct coresight_device *csdev;
220 
221 		csdev = per_cpu(csdev_src, cpu);
222 		/*
223 		 * If there is no ETM associated with this CPU clear it from
224 		 * the mask and continue with the rest. If ever we try to trace
225 		 * on this CPU, we handle it accordingly.
226 		 */
227 		if (!csdev) {
228 			cpumask_clear_cpu(cpu, mask);
229 			continue;
230 		}
231 
232 		/*
233 		 * Building a path doesn't enable it, it simply builds a
234 		 * list of devices from source to sink that can be
235 		 * referenced later when the path is actually needed.
236 		 */
237 		path = coresight_build_path(csdev, sink);
238 		if (IS_ERR(path)) {
239 			cpumask_clear_cpu(cpu, mask);
240 			continue;
241 		}
242 
243 		*etm_event_cpu_path_ptr(event_data, cpu) = path;
244 	}
245 
246 	/* If we don't have any CPUs ready for tracing, abort */
247 	cpu = cpumask_first(mask);
248 	if (cpu >= nr_cpu_ids)
249 		goto err;
250 
251 	/* Allocate the sink buffer for this session */
252 	event_data->snk_config =
253 			sink_ops(sink)->alloc_buffer(sink, cpu, pages,
254 						     nr_pages, overwrite);
255 	if (!event_data->snk_config)
256 		goto err;
257 
258 out:
259 	return event_data;
260 
261 err:
262 	etm_free_aux(event_data);
263 	event_data = NULL;
264 	goto out;
265 }
266 
267 static void etm_event_start(struct perf_event *event, int flags)
268 {
269 	int cpu = smp_processor_id();
270 	struct etm_event_data *event_data;
271 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
272 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
273 	struct list_head *path;
274 
275 	if (!csdev)
276 		goto fail;
277 
278 	/*
279 	 * Deal with the ring buffer API and get a handle on the
280 	 * session's information.
281 	 */
282 	event_data = perf_aux_output_begin(handle, event);
283 	if (!event_data)
284 		goto fail;
285 
286 	path = etm_event_cpu_path(event_data, cpu);
287 	/* We need a sink, no need to continue without one */
288 	sink = coresight_get_sink(path);
289 	if (WARN_ON_ONCE(!sink))
290 		goto fail_end_stop;
291 
292 	/* Nothing will happen without a path */
293 	if (coresight_enable_path(path, CS_MODE_PERF, handle))
294 		goto fail_end_stop;
295 
296 	/* Tell the perf core the event is alive */
297 	event->hw.state = 0;
298 
299 	/* Finally enable the tracer */
300 	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
301 		goto fail_disable_path;
302 
303 out:
304 	return;
305 
306 fail_disable_path:
307 	coresight_disable_path(path);
308 fail_end_stop:
309 	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
310 	perf_aux_output_end(handle, 0);
311 fail:
312 	event->hw.state = PERF_HES_STOPPED;
313 	goto out;
314 }
315 
316 static void etm_event_stop(struct perf_event *event, int mode)
317 {
318 	int cpu = smp_processor_id();
319 	unsigned long size;
320 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
321 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
322 	struct etm_event_data *event_data = perf_get_aux(handle);
323 	struct list_head *path;
324 
325 	if (event->hw.state == PERF_HES_STOPPED)
326 		return;
327 
328 	if (!csdev)
329 		return;
330 
331 	path = etm_event_cpu_path(event_data, cpu);
332 	if (!path)
333 		return;
334 
335 	sink = coresight_get_sink(path);
336 	if (!sink)
337 		return;
338 
339 	/* stop tracer */
340 	source_ops(csdev)->disable(csdev, event);
341 
342 	/* tell the core */
343 	event->hw.state = PERF_HES_STOPPED;
344 
345 	if (mode & PERF_EF_UPDATE) {
346 		if (WARN_ON_ONCE(handle->event != event))
347 			return;
348 
349 		/* update trace information */
350 		if (!sink_ops(sink)->update_buffer)
351 			return;
352 
353 		size = sink_ops(sink)->update_buffer(sink, handle,
354 					      event_data->snk_config);
355 		perf_aux_output_end(handle, size);
356 	}
357 
358 	/* Disabling the path make its elements available to other sessions */
359 	coresight_disable_path(path);
360 }
361 
362 static int etm_event_add(struct perf_event *event, int mode)
363 {
364 	int ret = 0;
365 	struct hw_perf_event *hwc = &event->hw;
366 
367 	if (mode & PERF_EF_START) {
368 		etm_event_start(event, 0);
369 		if (hwc->state & PERF_HES_STOPPED)
370 			ret = -EINVAL;
371 	} else {
372 		hwc->state = PERF_HES_STOPPED;
373 	}
374 
375 	return ret;
376 }
377 
378 static void etm_event_del(struct perf_event *event, int mode)
379 {
380 	etm_event_stop(event, PERF_EF_UPDATE);
381 }
382 
383 static int etm_addr_filters_validate(struct list_head *filters)
384 {
385 	bool range = false, address = false;
386 	int index = 0;
387 	struct perf_addr_filter *filter;
388 
389 	list_for_each_entry(filter, filters, entry) {
390 		/*
391 		 * No need to go further if there's no more
392 		 * room for filters.
393 		 */
394 		if (++index > ETM_ADDR_CMP_MAX)
395 			return -EOPNOTSUPP;
396 
397 		/* filter::size==0 means single address trigger */
398 		if (filter->size) {
399 			/*
400 			 * The existing code relies on START/STOP filters
401 			 * being address filters.
402 			 */
403 			if (filter->action == PERF_ADDR_FILTER_ACTION_START ||
404 			    filter->action == PERF_ADDR_FILTER_ACTION_STOP)
405 				return -EOPNOTSUPP;
406 
407 			range = true;
408 		} else
409 			address = true;
410 
411 		/*
412 		 * At this time we don't allow range and start/stop filtering
413 		 * to cohabitate, they have to be mutually exclusive.
414 		 */
415 		if (range && address)
416 			return -EOPNOTSUPP;
417 	}
418 
419 	return 0;
420 }
421 
422 static void etm_addr_filters_sync(struct perf_event *event)
423 {
424 	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
425 	unsigned long start, stop, *offs = event->addr_filters_offs;
426 	struct etm_filters *filters = event->hw.addr_filters;
427 	struct etm_filter *etm_filter;
428 	struct perf_addr_filter *filter;
429 	int i = 0;
430 
431 	list_for_each_entry(filter, &head->list, entry) {
432 		start = filter->offset + offs[i];
433 		stop = start + filter->size;
434 		etm_filter = &filters->etm_filter[i];
435 
436 		switch (filter->action) {
437 		case PERF_ADDR_FILTER_ACTION_FILTER:
438 			etm_filter->start_addr = start;
439 			etm_filter->stop_addr = stop;
440 			etm_filter->type = ETM_ADDR_TYPE_RANGE;
441 			break;
442 		case PERF_ADDR_FILTER_ACTION_START:
443 			etm_filter->start_addr = start;
444 			etm_filter->type = ETM_ADDR_TYPE_START;
445 			break;
446 		case PERF_ADDR_FILTER_ACTION_STOP:
447 			etm_filter->stop_addr = stop;
448 			etm_filter->type = ETM_ADDR_TYPE_STOP;
449 			break;
450 		}
451 		i++;
452 	}
453 
454 	filters->nr_filters = i;
455 }
456 
457 int etm_perf_symlink(struct coresight_device *csdev, bool link)
458 {
459 	char entry[sizeof("cpu9999999")];
460 	int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev);
461 	struct device *pmu_dev = etm_pmu.dev;
462 	struct device *cs_dev = &csdev->dev;
463 
464 	sprintf(entry, "cpu%d", cpu);
465 
466 	if (!etm_perf_up)
467 		return -EPROBE_DEFER;
468 
469 	if (link) {
470 		ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry);
471 		if (ret)
472 			return ret;
473 		per_cpu(csdev_src, cpu) = csdev;
474 	} else {
475 		sysfs_remove_link(&pmu_dev->kobj, entry);
476 		per_cpu(csdev_src, cpu) = NULL;
477 	}
478 
479 	return 0;
480 }
481 
482 static int __init etm_perf_init(void)
483 {
484 	int ret;
485 
486 	etm_pmu.capabilities		= PERF_PMU_CAP_EXCLUSIVE;
487 
488 	etm_pmu.attr_groups		= etm_pmu_attr_groups;
489 	etm_pmu.task_ctx_nr		= perf_sw_context;
490 	etm_pmu.read			= etm_event_read;
491 	etm_pmu.event_init		= etm_event_init;
492 	etm_pmu.setup_aux		= etm_setup_aux;
493 	etm_pmu.free_aux		= etm_free_aux;
494 	etm_pmu.start			= etm_event_start;
495 	etm_pmu.stop			= etm_event_stop;
496 	etm_pmu.add			= etm_event_add;
497 	etm_pmu.del			= etm_event_del;
498 	etm_pmu.addr_filters_sync	= etm_addr_filters_sync;
499 	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
500 	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
501 
502 	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
503 	if (ret == 0)
504 		etm_perf_up = true;
505 
506 	return ret;
507 }
508 device_initcall(etm_perf_init);
509