1 /* 2 * Copyright(C) 2015 Linaro Limited. All rights reserved. 3 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <linux/coresight.h> 19 #include <linux/coresight-pmu.h> 20 #include <linux/cpumask.h> 21 #include <linux/device.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/init.h> 25 #include <linux/perf_event.h> 26 #include <linux/slab.h> 27 #include <linux/types.h> 28 #include <linux/workqueue.h> 29 30 #include "coresight-etm-perf.h" 31 #include "coresight-priv.h" 32 33 static struct pmu etm_pmu; 34 static bool etm_perf_up; 35 36 /** 37 * struct etm_event_data - Coresight specifics associated to an event 38 * @work: Handle to free allocated memory outside IRQ context. 39 * @mask: Hold the CPU(s) this event was set for. 40 * @snk_config: The sink configuration. 41 * @path: An array of path, each slot for one CPU. 42 */ 43 struct etm_event_data { 44 struct work_struct work; 45 cpumask_t mask; 46 void *snk_config; 47 struct list_head **path; 48 }; 49 50 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); 51 static DEFINE_PER_CPU(struct coresight_device *, csdev_src); 52 53 /* ETMv3.5/PTM's ETMCR is 'config' */ 54 PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); 55 PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); 56 PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); 57 58 static struct attribute *etm_config_formats_attr[] = { 59 &format_attr_cycacc.attr, 60 &format_attr_timestamp.attr, 61 &format_attr_retstack.attr, 62 NULL, 63 }; 64 65 static const struct attribute_group etm_pmu_format_group = { 66 .name = "format", 67 .attrs = etm_config_formats_attr, 68 }; 69 70 static const struct attribute_group *etm_pmu_attr_groups[] = { 71 &etm_pmu_format_group, 72 NULL, 73 }; 74 75 static void etm_event_read(struct perf_event *event) {} 76 77 static int etm_addr_filters_alloc(struct perf_event *event) 78 { 79 struct etm_filters *filters; 80 int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); 81 82 filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node); 83 if (!filters) 84 return -ENOMEM; 85 86 if (event->parent) 87 memcpy(filters, event->parent->hw.addr_filters, 88 sizeof(*filters)); 89 90 event->hw.addr_filters = filters; 91 92 return 0; 93 } 94 95 static void etm_event_destroy(struct perf_event *event) 96 { 97 kfree(event->hw.addr_filters); 98 event->hw.addr_filters = NULL; 99 } 100 101 static int etm_event_init(struct perf_event *event) 102 { 103 int ret = 0; 104 105 if (event->attr.type != etm_pmu.type) { 106 ret = -ENOENT; 107 goto out; 108 } 109 110 ret = etm_addr_filters_alloc(event); 111 if (ret) 112 goto out; 113 114 event->destroy = etm_event_destroy; 115 out: 116 return ret; 117 } 118 119 static void free_event_data(struct work_struct *work) 120 { 121 int cpu; 122 cpumask_t *mask; 123 struct etm_event_data *event_data; 124 struct coresight_device *sink; 125 126 event_data = container_of(work, struct etm_event_data, work); 127 mask = &event_data->mask; 128 /* 129 * First deal with the sink configuration. See comment in 130 * etm_setup_aux() about why we take the first available path. 131 */ 132 if (event_data->snk_config) { 133 cpu = cpumask_first(mask); 134 sink = coresight_get_sink(event_data->path[cpu]); 135 if (sink_ops(sink)->free_buffer) 136 sink_ops(sink)->free_buffer(event_data->snk_config); 137 } 138 139 for_each_cpu(cpu, mask) { 140 if (!(IS_ERR_OR_NULL(event_data->path[cpu]))) 141 coresight_release_path(event_data->path[cpu]); 142 } 143 144 kfree(event_data->path); 145 kfree(event_data); 146 } 147 148 static void *alloc_event_data(int cpu) 149 { 150 int size; 151 cpumask_t *mask; 152 struct etm_event_data *event_data; 153 154 /* First get memory for the session's data */ 155 event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL); 156 if (!event_data) 157 return NULL; 158 159 /* Make sure nothing disappears under us */ 160 get_online_cpus(); 161 size = num_online_cpus(); 162 163 mask = &event_data->mask; 164 if (cpu != -1) 165 cpumask_set_cpu(cpu, mask); 166 else 167 cpumask_copy(mask, cpu_online_mask); 168 put_online_cpus(); 169 170 /* 171 * Each CPU has a single path between source and destination. As such 172 * allocate an array using CPU numbers as indexes. That way a path 173 * for any CPU can easily be accessed at any given time. We proceed 174 * the same way for sessions involving a single CPU. The cost of 175 * unused memory when dealing with single CPU trace scenarios is small 176 * compared to the cost of searching through an optimized array. 177 */ 178 event_data->path = kcalloc(size, 179 sizeof(struct list_head *), GFP_KERNEL); 180 if (!event_data->path) { 181 kfree(event_data); 182 return NULL; 183 } 184 185 return event_data; 186 } 187 188 static void etm_free_aux(void *data) 189 { 190 struct etm_event_data *event_data = data; 191 192 schedule_work(&event_data->work); 193 } 194 195 static void *etm_setup_aux(int event_cpu, void **pages, 196 int nr_pages, bool overwrite) 197 { 198 int cpu; 199 cpumask_t *mask; 200 struct coresight_device *sink; 201 struct etm_event_data *event_data = NULL; 202 203 event_data = alloc_event_data(event_cpu); 204 if (!event_data) 205 return NULL; 206 INIT_WORK(&event_data->work, free_event_data); 207 208 /* 209 * In theory nothing prevent tracers in a trace session from being 210 * associated with different sinks, nor having a sink per tracer. But 211 * until we have HW with this kind of topology we need to assume tracers 212 * in a trace session are using the same sink. Therefore go through 213 * the coresight bus and pick the first enabled sink. 214 * 215 * When operated from sysFS users are responsible to enable the sink 216 * while from perf, the perf tools will do it based on the choice made 217 * on the cmd line. As such the "enable_sink" flag in sysFS is reset. 218 */ 219 sink = coresight_get_enabled_sink(true); 220 if (!sink) 221 goto err; 222 223 mask = &event_data->mask; 224 225 /* Setup the path for each CPU in a trace session */ 226 for_each_cpu(cpu, mask) { 227 struct coresight_device *csdev; 228 229 csdev = per_cpu(csdev_src, cpu); 230 if (!csdev) 231 goto err; 232 233 /* 234 * Building a path doesn't enable it, it simply builds a 235 * list of devices from source to sink that can be 236 * referenced later when the path is actually needed. 237 */ 238 event_data->path[cpu] = coresight_build_path(csdev, sink); 239 if (IS_ERR(event_data->path[cpu])) 240 goto err; 241 } 242 243 if (!sink_ops(sink)->alloc_buffer) 244 goto err; 245 246 cpu = cpumask_first(mask); 247 /* Get the AUX specific data from the sink buffer */ 248 event_data->snk_config = 249 sink_ops(sink)->alloc_buffer(sink, cpu, pages, 250 nr_pages, overwrite); 251 if (!event_data->snk_config) 252 goto err; 253 254 out: 255 return event_data; 256 257 err: 258 etm_free_aux(event_data); 259 event_data = NULL; 260 goto out; 261 } 262 263 static void etm_event_start(struct perf_event *event, int flags) 264 { 265 int cpu = smp_processor_id(); 266 struct etm_event_data *event_data; 267 struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); 268 struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); 269 270 if (!csdev) 271 goto fail; 272 273 /* 274 * Deal with the ring buffer API and get a handle on the 275 * session's information. 276 */ 277 event_data = perf_aux_output_begin(handle, event); 278 if (!event_data) 279 goto fail; 280 281 /* We need a sink, no need to continue without one */ 282 sink = coresight_get_sink(event_data->path[cpu]); 283 if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) 284 goto fail_end_stop; 285 286 /* Configure the sink */ 287 if (sink_ops(sink)->set_buffer(sink, handle, 288 event_data->snk_config)) 289 goto fail_end_stop; 290 291 /* Nothing will happen without a path */ 292 if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF)) 293 goto fail_end_stop; 294 295 /* Tell the perf core the event is alive */ 296 event->hw.state = 0; 297 298 /* Finally enable the tracer */ 299 if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) 300 goto fail_end_stop; 301 302 out: 303 return; 304 305 fail_end_stop: 306 perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); 307 perf_aux_output_end(handle, 0); 308 fail: 309 event->hw.state = PERF_HES_STOPPED; 310 goto out; 311 } 312 313 static void etm_event_stop(struct perf_event *event, int mode) 314 { 315 int cpu = smp_processor_id(); 316 unsigned long size; 317 struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); 318 struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); 319 struct etm_event_data *event_data = perf_get_aux(handle); 320 321 if (event->hw.state == PERF_HES_STOPPED) 322 return; 323 324 if (!csdev) 325 return; 326 327 sink = coresight_get_sink(event_data->path[cpu]); 328 if (!sink) 329 return; 330 331 /* stop tracer */ 332 source_ops(csdev)->disable(csdev, event); 333 334 /* tell the core */ 335 event->hw.state = PERF_HES_STOPPED; 336 337 if (mode & PERF_EF_UPDATE) { 338 if (WARN_ON_ONCE(handle->event != event)) 339 return; 340 341 /* update trace information */ 342 if (!sink_ops(sink)->update_buffer) 343 return; 344 345 sink_ops(sink)->update_buffer(sink, handle, 346 event_data->snk_config); 347 348 if (!sink_ops(sink)->reset_buffer) 349 return; 350 351 size = sink_ops(sink)->reset_buffer(sink, handle, 352 event_data->snk_config); 353 354 perf_aux_output_end(handle, size); 355 } 356 357 /* Disabling the path make its elements available to other sessions */ 358 coresight_disable_path(event_data->path[cpu]); 359 } 360 361 static int etm_event_add(struct perf_event *event, int mode) 362 { 363 int ret = 0; 364 struct hw_perf_event *hwc = &event->hw; 365 366 if (mode & PERF_EF_START) { 367 etm_event_start(event, 0); 368 if (hwc->state & PERF_HES_STOPPED) 369 ret = -EINVAL; 370 } else { 371 hwc->state = PERF_HES_STOPPED; 372 } 373 374 return ret; 375 } 376 377 static void etm_event_del(struct perf_event *event, int mode) 378 { 379 etm_event_stop(event, PERF_EF_UPDATE); 380 } 381 382 static int etm_addr_filters_validate(struct list_head *filters) 383 { 384 bool range = false, address = false; 385 int index = 0; 386 struct perf_addr_filter *filter; 387 388 list_for_each_entry(filter, filters, entry) { 389 /* 390 * No need to go further if there's no more 391 * room for filters. 392 */ 393 if (++index > ETM_ADDR_CMP_MAX) 394 return -EOPNOTSUPP; 395 396 /* 397 * As taken from the struct perf_addr_filter documentation: 398 * @range: 1: range, 0: address 399 * 400 * At this time we don't allow range and start/stop filtering 401 * to cohabitate, they have to be mutually exclusive. 402 */ 403 if ((filter->range == 1) && address) 404 return -EOPNOTSUPP; 405 406 if ((filter->range == 0) && range) 407 return -EOPNOTSUPP; 408 409 /* 410 * For range filtering, the second address in the address 411 * range comparator needs to be higher than the first. 412 * Invalid otherwise. 413 */ 414 if (filter->range && filter->size == 0) 415 return -EINVAL; 416 417 /* 418 * Everything checks out with this filter, record what we've 419 * received before moving on to the next one. 420 */ 421 if (filter->range) 422 range = true; 423 else 424 address = true; 425 } 426 427 return 0; 428 } 429 430 static void etm_addr_filters_sync(struct perf_event *event) 431 { 432 struct perf_addr_filters_head *head = perf_event_addr_filters(event); 433 unsigned long start, stop, *offs = event->addr_filters_offs; 434 struct etm_filters *filters = event->hw.addr_filters; 435 struct etm_filter *etm_filter; 436 struct perf_addr_filter *filter; 437 int i = 0; 438 439 list_for_each_entry(filter, &head->list, entry) { 440 start = filter->offset + offs[i]; 441 stop = start + filter->size; 442 etm_filter = &filters->etm_filter[i]; 443 444 if (filter->range == 1) { 445 etm_filter->start_addr = start; 446 etm_filter->stop_addr = stop; 447 etm_filter->type = ETM_ADDR_TYPE_RANGE; 448 } else { 449 if (filter->filter == 1) { 450 etm_filter->start_addr = start; 451 etm_filter->type = ETM_ADDR_TYPE_START; 452 } else { 453 etm_filter->stop_addr = stop; 454 etm_filter->type = ETM_ADDR_TYPE_STOP; 455 } 456 } 457 i++; 458 } 459 460 filters->nr_filters = i; 461 } 462 463 int etm_perf_symlink(struct coresight_device *csdev, bool link) 464 { 465 char entry[sizeof("cpu9999999")]; 466 int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev); 467 struct device *pmu_dev = etm_pmu.dev; 468 struct device *cs_dev = &csdev->dev; 469 470 sprintf(entry, "cpu%d", cpu); 471 472 if (!etm_perf_up) 473 return -EPROBE_DEFER; 474 475 if (link) { 476 ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry); 477 if (ret) 478 return ret; 479 per_cpu(csdev_src, cpu) = csdev; 480 } else { 481 sysfs_remove_link(&pmu_dev->kobj, entry); 482 per_cpu(csdev_src, cpu) = NULL; 483 } 484 485 return 0; 486 } 487 488 static int __init etm_perf_init(void) 489 { 490 int ret; 491 492 etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE; 493 494 etm_pmu.attr_groups = etm_pmu_attr_groups; 495 etm_pmu.task_ctx_nr = perf_sw_context; 496 etm_pmu.read = etm_event_read; 497 etm_pmu.event_init = etm_event_init; 498 etm_pmu.setup_aux = etm_setup_aux; 499 etm_pmu.free_aux = etm_free_aux; 500 etm_pmu.start = etm_event_start; 501 etm_pmu.stop = etm_event_stop; 502 etm_pmu.add = etm_event_add; 503 etm_pmu.del = etm_event_del; 504 etm_pmu.addr_filters_sync = etm_addr_filters_sync; 505 etm_pmu.addr_filters_validate = etm_addr_filters_validate; 506 etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX; 507 508 ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); 509 if (ret == 0) 510 etm_perf_up = true; 511 512 return ret; 513 } 514 device_initcall(etm_perf_init); 515