1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015 Linaro Limited. All rights reserved. 4 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 5 */ 6 7 #include <linux/coresight.h> 8 #include <linux/coresight-pmu.h> 9 #include <linux/cpumask.h> 10 #include <linux/device.h> 11 #include <linux/list.h> 12 #include <linux/mm.h> 13 #include <linux/init.h> 14 #include <linux/perf_event.h> 15 #include <linux/percpu-defs.h> 16 #include <linux/slab.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 20 #include "coresight-etm-perf.h" 21 #include "coresight-priv.h" 22 23 static struct pmu etm_pmu; 24 static bool etm_perf_up; 25 26 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); 27 static DEFINE_PER_CPU(struct coresight_device *, csdev_src); 28 29 /* ETMv3.5/PTM's ETMCR is 'config' */ 30 PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); 31 PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); 32 PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); 33 34 static struct attribute *etm_config_formats_attr[] = { 35 &format_attr_cycacc.attr, 36 &format_attr_timestamp.attr, 37 &format_attr_retstack.attr, 38 NULL, 39 }; 40 41 static const struct attribute_group etm_pmu_format_group = { 42 .name = "format", 43 .attrs = etm_config_formats_attr, 44 }; 45 46 static const struct attribute_group *etm_pmu_attr_groups[] = { 47 &etm_pmu_format_group, 48 NULL, 49 }; 50 51 static inline struct list_head ** 52 etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) 53 { 54 return per_cpu_ptr(data->path, cpu); 55 } 56 57 static inline struct list_head * 58 etm_event_cpu_path(struct etm_event_data *data, int cpu) 59 { 60 return *etm_event_cpu_path_ptr(data, cpu); 61 } 62 63 static void etm_event_read(struct perf_event *event) {} 64 65 static int etm_addr_filters_alloc(struct perf_event *event) 66 { 67 struct etm_filters *filters; 68 int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); 69 70 filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node); 71 if (!filters) 72 return -ENOMEM; 73 74 if (event->parent) 75 memcpy(filters, event->parent->hw.addr_filters, 76 sizeof(*filters)); 77 78 event->hw.addr_filters = filters; 79 80 return 0; 81 } 82 83 static void etm_event_destroy(struct perf_event *event) 84 { 85 kfree(event->hw.addr_filters); 86 event->hw.addr_filters = NULL; 87 } 88 89 static int etm_event_init(struct perf_event *event) 90 { 91 int ret = 0; 92 93 if (event->attr.type != etm_pmu.type) { 94 ret = -ENOENT; 95 goto out; 96 } 97 98 ret = etm_addr_filters_alloc(event); 99 if (ret) 100 goto out; 101 102 event->destroy = etm_event_destroy; 103 out: 104 return ret; 105 } 106 107 static void free_event_data(struct work_struct *work) 108 { 109 int cpu; 110 cpumask_t *mask; 111 struct etm_event_data *event_data; 112 struct coresight_device *sink; 113 114 event_data = container_of(work, struct etm_event_data, work); 115 mask = &event_data->mask; 116 117 /* Free the sink buffers, if there are any */ 118 if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) { 119 cpu = cpumask_first(mask); 120 sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); 121 if (sink_ops(sink)->free_buffer) 122 sink_ops(sink)->free_buffer(event_data->snk_config); 123 } 124 125 for_each_cpu(cpu, mask) { 126 struct list_head **ppath; 127 128 ppath = etm_event_cpu_path_ptr(event_data, cpu); 129 if (!(IS_ERR_OR_NULL(*ppath))) 130 coresight_release_path(*ppath); 131 *ppath = NULL; 132 } 133 134 free_percpu(event_data->path); 135 kfree(event_data); 136 } 137 138 static void *alloc_event_data(int cpu) 139 { 140 cpumask_t *mask; 141 struct etm_event_data *event_data; 142 143 /* First get memory for the session's data */ 144 event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL); 145 if (!event_data) 146 return NULL; 147 148 149 mask = &event_data->mask; 150 if (cpu != -1) 151 cpumask_set_cpu(cpu, mask); 152 else 153 cpumask_copy(mask, cpu_present_mask); 154 155 /* 156 * Each CPU has a single path between source and destination. As such 157 * allocate an array using CPU numbers as indexes. That way a path 158 * for any CPU can easily be accessed at any given time. We proceed 159 * the same way for sessions involving a single CPU. The cost of 160 * unused memory when dealing with single CPU trace scenarios is small 161 * compared to the cost of searching through an optimized array. 162 */ 163 event_data->path = alloc_percpu(struct list_head *); 164 165 if (!event_data->path) { 166 kfree(event_data); 167 return NULL; 168 } 169 170 return event_data; 171 } 172 173 static void etm_free_aux(void *data) 174 { 175 struct etm_event_data *event_data = data; 176 177 schedule_work(&event_data->work); 178 } 179 180 static void *etm_setup_aux(int event_cpu, void **pages, 181 int nr_pages, bool overwrite) 182 { 183 int cpu; 184 cpumask_t *mask; 185 struct coresight_device *sink; 186 struct etm_event_data *event_data = NULL; 187 188 event_data = alloc_event_data(event_cpu); 189 if (!event_data) 190 return NULL; 191 INIT_WORK(&event_data->work, free_event_data); 192 193 /* 194 * In theory nothing prevent tracers in a trace session from being 195 * associated with different sinks, nor having a sink per tracer. But 196 * until we have HW with this kind of topology we need to assume tracers 197 * in a trace session are using the same sink. Therefore go through 198 * the coresight bus and pick the first enabled sink. 199 * 200 * When operated from sysFS users are responsible to enable the sink 201 * while from perf, the perf tools will do it based on the choice made 202 * on the cmd line. As such the "enable_sink" flag in sysFS is reset. 203 */ 204 sink = coresight_get_enabled_sink(true); 205 if (!sink || !sink_ops(sink)->alloc_buffer) 206 goto err; 207 208 mask = &event_data->mask; 209 210 /* 211 * Setup the path for each CPU in a trace session. We try to build 212 * trace path for each CPU in the mask. If we don't find an ETM 213 * for the CPU or fail to build a path, we clear the CPU from the 214 * mask and continue with the rest. If ever we try to trace on those 215 * CPUs, we can handle it and fail the session. 216 */ 217 for_each_cpu(cpu, mask) { 218 struct list_head *path; 219 struct coresight_device *csdev; 220 221 csdev = per_cpu(csdev_src, cpu); 222 /* 223 * If there is no ETM associated with this CPU clear it from 224 * the mask and continue with the rest. If ever we try to trace 225 * on this CPU, we handle it accordingly. 226 */ 227 if (!csdev) { 228 cpumask_clear_cpu(cpu, mask); 229 continue; 230 } 231 232 /* 233 * Building a path doesn't enable it, it simply builds a 234 * list of devices from source to sink that can be 235 * referenced later when the path is actually needed. 236 */ 237 path = coresight_build_path(csdev, sink); 238 if (IS_ERR(path)) { 239 cpumask_clear_cpu(cpu, mask); 240 continue; 241 } 242 243 *etm_event_cpu_path_ptr(event_data, cpu) = path; 244 } 245 246 /* If we don't have any CPUs ready for tracing, abort */ 247 cpu = cpumask_first(mask); 248 if (cpu >= nr_cpu_ids) 249 goto err; 250 251 /* Allocate the sink buffer for this session */ 252 event_data->snk_config = 253 sink_ops(sink)->alloc_buffer(sink, cpu, pages, 254 nr_pages, overwrite); 255 if (!event_data->snk_config) 256 goto err; 257 258 out: 259 return event_data; 260 261 err: 262 etm_free_aux(event_data); 263 event_data = NULL; 264 goto out; 265 } 266 267 static void etm_event_start(struct perf_event *event, int flags) 268 { 269 int cpu = smp_processor_id(); 270 struct etm_event_data *event_data; 271 struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); 272 struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); 273 struct list_head *path; 274 275 if (!csdev) 276 goto fail; 277 278 /* 279 * Deal with the ring buffer API and get a handle on the 280 * session's information. 281 */ 282 event_data = perf_aux_output_begin(handle, event); 283 if (!event_data) 284 goto fail; 285 286 path = etm_event_cpu_path(event_data, cpu); 287 /* We need a sink, no need to continue without one */ 288 sink = coresight_get_sink(path); 289 if (WARN_ON_ONCE(!sink)) 290 goto fail_end_stop; 291 292 /* Nothing will happen without a path */ 293 if (coresight_enable_path(path, CS_MODE_PERF, handle)) 294 goto fail_end_stop; 295 296 /* Tell the perf core the event is alive */ 297 event->hw.state = 0; 298 299 /* Finally enable the tracer */ 300 if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) 301 goto fail_disable_path; 302 303 out: 304 return; 305 306 fail_disable_path: 307 coresight_disable_path(path); 308 fail_end_stop: 309 perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); 310 perf_aux_output_end(handle, 0); 311 fail: 312 event->hw.state = PERF_HES_STOPPED; 313 goto out; 314 } 315 316 static void etm_event_stop(struct perf_event *event, int mode) 317 { 318 int cpu = smp_processor_id(); 319 unsigned long size; 320 struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); 321 struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); 322 struct etm_event_data *event_data = perf_get_aux(handle); 323 struct list_head *path; 324 325 if (event->hw.state == PERF_HES_STOPPED) 326 return; 327 328 if (!csdev) 329 return; 330 331 path = etm_event_cpu_path(event_data, cpu); 332 if (!path) 333 return; 334 335 sink = coresight_get_sink(path); 336 if (!sink) 337 return; 338 339 /* stop tracer */ 340 source_ops(csdev)->disable(csdev, event); 341 342 /* tell the core */ 343 event->hw.state = PERF_HES_STOPPED; 344 345 if (mode & PERF_EF_UPDATE) { 346 if (WARN_ON_ONCE(handle->event != event)) 347 return; 348 349 /* update trace information */ 350 if (!sink_ops(sink)->update_buffer) 351 return; 352 353 size = sink_ops(sink)->update_buffer(sink, handle, 354 event_data->snk_config); 355 perf_aux_output_end(handle, size); 356 } 357 358 /* Disabling the path make its elements available to other sessions */ 359 coresight_disable_path(path); 360 } 361 362 static int etm_event_add(struct perf_event *event, int mode) 363 { 364 int ret = 0; 365 struct hw_perf_event *hwc = &event->hw; 366 367 if (mode & PERF_EF_START) { 368 etm_event_start(event, 0); 369 if (hwc->state & PERF_HES_STOPPED) 370 ret = -EINVAL; 371 } else { 372 hwc->state = PERF_HES_STOPPED; 373 } 374 375 return ret; 376 } 377 378 static void etm_event_del(struct perf_event *event, int mode) 379 { 380 etm_event_stop(event, PERF_EF_UPDATE); 381 } 382 383 static int etm_addr_filters_validate(struct list_head *filters) 384 { 385 bool range = false, address = false; 386 int index = 0; 387 struct perf_addr_filter *filter; 388 389 list_for_each_entry(filter, filters, entry) { 390 /* 391 * No need to go further if there's no more 392 * room for filters. 393 */ 394 if (++index > ETM_ADDR_CMP_MAX) 395 return -EOPNOTSUPP; 396 397 /* filter::size==0 means single address trigger */ 398 if (filter->size) { 399 /* 400 * The existing code relies on START/STOP filters 401 * being address filters. 402 */ 403 if (filter->action == PERF_ADDR_FILTER_ACTION_START || 404 filter->action == PERF_ADDR_FILTER_ACTION_STOP) 405 return -EOPNOTSUPP; 406 407 range = true; 408 } else 409 address = true; 410 411 /* 412 * At this time we don't allow range and start/stop filtering 413 * to cohabitate, they have to be mutually exclusive. 414 */ 415 if (range && address) 416 return -EOPNOTSUPP; 417 } 418 419 return 0; 420 } 421 422 static void etm_addr_filters_sync(struct perf_event *event) 423 { 424 struct perf_addr_filters_head *head = perf_event_addr_filters(event); 425 unsigned long start, stop, *offs = event->addr_filters_offs; 426 struct etm_filters *filters = event->hw.addr_filters; 427 struct etm_filter *etm_filter; 428 struct perf_addr_filter *filter; 429 int i = 0; 430 431 list_for_each_entry(filter, &head->list, entry) { 432 start = filter->offset + offs[i]; 433 stop = start + filter->size; 434 etm_filter = &filters->etm_filter[i]; 435 436 switch (filter->action) { 437 case PERF_ADDR_FILTER_ACTION_FILTER: 438 etm_filter->start_addr = start; 439 etm_filter->stop_addr = stop; 440 etm_filter->type = ETM_ADDR_TYPE_RANGE; 441 break; 442 case PERF_ADDR_FILTER_ACTION_START: 443 etm_filter->start_addr = start; 444 etm_filter->type = ETM_ADDR_TYPE_START; 445 break; 446 case PERF_ADDR_FILTER_ACTION_STOP: 447 etm_filter->stop_addr = stop; 448 etm_filter->type = ETM_ADDR_TYPE_STOP; 449 break; 450 } 451 i++; 452 } 453 454 filters->nr_filters = i; 455 } 456 457 int etm_perf_symlink(struct coresight_device *csdev, bool link) 458 { 459 char entry[sizeof("cpu9999999")]; 460 int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev); 461 struct device *pmu_dev = etm_pmu.dev; 462 struct device *cs_dev = &csdev->dev; 463 464 sprintf(entry, "cpu%d", cpu); 465 466 if (!etm_perf_up) 467 return -EPROBE_DEFER; 468 469 if (link) { 470 ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry); 471 if (ret) 472 return ret; 473 per_cpu(csdev_src, cpu) = csdev; 474 } else { 475 sysfs_remove_link(&pmu_dev->kobj, entry); 476 per_cpu(csdev_src, cpu) = NULL; 477 } 478 479 return 0; 480 } 481 482 static int __init etm_perf_init(void) 483 { 484 int ret; 485 486 etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE; 487 488 etm_pmu.attr_groups = etm_pmu_attr_groups; 489 etm_pmu.task_ctx_nr = perf_sw_context; 490 etm_pmu.read = etm_event_read; 491 etm_pmu.event_init = etm_event_init; 492 etm_pmu.setup_aux = etm_setup_aux; 493 etm_pmu.free_aux = etm_free_aux; 494 etm_pmu.start = etm_event_start; 495 etm_pmu.stop = etm_event_stop; 496 etm_pmu.add = etm_event_add; 497 etm_pmu.del = etm_event_del; 498 etm_pmu.addr_filters_sync = etm_addr_filters_sync; 499 etm_pmu.addr_filters_validate = etm_addr_filters_validate; 500 etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX; 501 502 ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); 503 if (ret == 0) 504 etm_perf_up = true; 505 506 return ret; 507 } 508 device_initcall(etm_perf_init); 509