1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * This driver adds support for perf events to use the Performance 5 * Monitor Counter Groups (PMCG) associated with an SMMUv3 node 6 * to monitor that node. 7 * 8 * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where 9 * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped 10 * to 4K boundary. For example, the PMCG at 0xff88840000 is named 11 * smmuv3_pmcg_ff88840 12 * 13 * Filtering by stream id is done by specifying filtering parameters 14 * with the event. options are: 15 * filter_enable - 0 = no filtering, 1 = filtering enabled 16 * filter_span - 0 = exact match, 1 = pattern match 17 * filter_stream_id - pattern to filter against 18 * 19 * To match a partial StreamID where the X most-significant bits must match 20 * but the Y least-significant bits might differ, STREAMID is programmed 21 * with a value that contains: 22 * STREAMID[Y - 1] == 0. 23 * STREAMID[Y - 2:0] == 1 (where Y > 1). 24 * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards) 25 * contain a value to match from the corresponding bits of event StreamID. 26 * 27 * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1, 28 * filter_span=1,filter_stream_id=0x42/ -a netperf 29 * Applies filter pattern 0x42 to transaction events, which means events 30 * matching stream ids 0x42 and 0x43 are counted. Further filtering 31 * information is available in the SMMU documentation. 32 * 33 * SMMU events are not attributable to a CPU, so task mode and sampling 34 * are not supported. 35 */ 36 37 #include <linux/acpi.h> 38 #include <linux/acpi_iort.h> 39 #include <linux/bitfield.h> 40 #include <linux/bitops.h> 41 #include <linux/cpuhotplug.h> 42 #include <linux/cpumask.h> 43 #include <linux/device.h> 44 #include <linux/errno.h> 45 #include <linux/interrupt.h> 46 #include <linux/irq.h> 47 #include <linux/kernel.h> 48 #include <linux/list.h> 49 #include <linux/msi.h> 50 #include <linux/perf_event.h> 51 #include <linux/platform_device.h> 52 #include <linux/smp.h> 53 #include <linux/sysfs.h> 54 #include <linux/types.h> 55 56 #define SMMU_PMCG_EVCNTR0 0x0 57 #define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride)) 58 #define SMMU_PMCG_EVTYPER0 0x400 59 #define SMMU_PMCG_EVTYPER(n) (SMMU_PMCG_EVTYPER0 + (n) * 4) 60 #define SMMU_PMCG_SID_SPAN_SHIFT 29 61 #define SMMU_PMCG_SMR0 0xA00 62 #define SMMU_PMCG_SMR(n) (SMMU_PMCG_SMR0 + (n) * 4) 63 #define SMMU_PMCG_CNTENSET0 0xC00 64 #define SMMU_PMCG_CNTENCLR0 0xC20 65 #define SMMU_PMCG_INTENSET0 0xC40 66 #define SMMU_PMCG_INTENCLR0 0xC60 67 #define SMMU_PMCG_OVSCLR0 0xC80 68 #define SMMU_PMCG_OVSSET0 0xCC0 69 #define SMMU_PMCG_CFGR 0xE00 70 #define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23) 71 #define SMMU_PMCG_CFGR_MSI BIT(21) 72 #define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20) 73 #define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8) 74 #define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0) 75 #define SMMU_PMCG_CR 0xE04 76 #define SMMU_PMCG_CR_ENABLE BIT(0) 77 #define SMMU_PMCG_IIDR 0xE08 78 #define SMMU_PMCG_CEID0 0xE20 79 #define SMMU_PMCG_CEID1 0xE28 80 #define SMMU_PMCG_IRQ_CTRL 0xE50 81 #define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0) 82 #define SMMU_PMCG_IRQ_CFG0 0xE58 83 #define SMMU_PMCG_IRQ_CFG1 0xE60 84 #define SMMU_PMCG_IRQ_CFG2 0xE64 85 86 /* MSI config fields */ 87 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) 88 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE 0x1 89 90 #define SMMU_PMCG_DEFAULT_FILTER_SPAN 1 91 #define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0) 92 93 #define SMMU_PMCG_MAX_COUNTERS 64 94 #define SMMU_PMCG_ARCH_MAX_EVENTS 128 95 96 #define SMMU_PMCG_PA_SHIFT 12 97 98 #define SMMU_PMCG_EVCNTR_RDONLY BIT(0) 99 100 static int cpuhp_state_num; 101 102 struct smmu_pmu { 103 struct hlist_node node; 104 struct perf_event *events[SMMU_PMCG_MAX_COUNTERS]; 105 DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS); 106 DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS); 107 unsigned int irq; 108 unsigned int on_cpu; 109 struct pmu pmu; 110 unsigned int num_counters; 111 struct device *dev; 112 void __iomem *reg_base; 113 void __iomem *reloc_base; 114 u64 counter_mask; 115 u32 options; 116 u32 iidr; 117 bool global_filter; 118 }; 119 120 #define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu)) 121 122 #define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \ 123 static inline u32 get_##_name(struct perf_event *event) \ 124 { \ 125 return FIELD_GET(GENMASK_ULL(_end, _start), \ 126 event->attr._config); \ 127 } \ 128 129 SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15); 130 SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31); 131 SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32); 132 SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33); 133 134 static inline void smmu_pmu_enable(struct pmu *pmu) 135 { 136 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 137 138 writel(SMMU_PMCG_IRQ_CTRL_IRQEN, 139 smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); 140 writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); 141 } 142 143 static inline void smmu_pmu_disable(struct pmu *pmu) 144 { 145 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 146 147 writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR); 148 writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); 149 } 150 151 static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, 152 u32 idx, u64 value) 153 { 154 if (smmu_pmu->counter_mask & BIT(32)) 155 writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); 156 else 157 writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); 158 } 159 160 static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx) 161 { 162 u64 value; 163 164 if (smmu_pmu->counter_mask & BIT(32)) 165 value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); 166 else 167 value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); 168 169 return value; 170 } 171 172 static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx) 173 { 174 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0); 175 } 176 177 static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx) 178 { 179 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); 180 } 181 182 static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx) 183 { 184 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0); 185 } 186 187 static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu, 188 u32 idx) 189 { 190 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); 191 } 192 193 static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx, 194 u32 val) 195 { 196 writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); 197 } 198 199 static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val) 200 { 201 writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx)); 202 } 203 204 static void smmu_pmu_event_update(struct perf_event *event) 205 { 206 struct hw_perf_event *hwc = &event->hw; 207 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 208 u64 delta, prev, now; 209 u32 idx = hwc->idx; 210 211 do { 212 prev = local64_read(&hwc->prev_count); 213 now = smmu_pmu_counter_get_value(smmu_pmu, idx); 214 } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); 215 216 /* handle overflow. */ 217 delta = now - prev; 218 delta &= smmu_pmu->counter_mask; 219 220 local64_add(delta, &event->count); 221 } 222 223 static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu, 224 struct hw_perf_event *hwc) 225 { 226 u32 idx = hwc->idx; 227 u64 new; 228 229 if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) { 230 /* 231 * On platforms that require this quirk, if the counter starts 232 * at < half_counter value and wraps, the current logic of 233 * handling the overflow may not work. It is expected that, 234 * those platforms will have full 64 counter bits implemented 235 * so that such a possibility is remote(eg: HiSilicon HIP08). 236 */ 237 new = smmu_pmu_counter_get_value(smmu_pmu, idx); 238 } else { 239 /* 240 * We limit the max period to half the max counter value 241 * of the counter size, so that even in the case of extreme 242 * interrupt latency the counter will (hopefully) not wrap 243 * past its initial value. 244 */ 245 new = smmu_pmu->counter_mask >> 1; 246 smmu_pmu_counter_set_value(smmu_pmu, idx, new); 247 } 248 249 local64_set(&hwc->prev_count, new); 250 } 251 252 static void smmu_pmu_set_event_filter(struct perf_event *event, 253 int idx, u32 span, u32 sid) 254 { 255 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 256 u32 evtyper; 257 258 evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT; 259 smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper); 260 smmu_pmu_set_smr(smmu_pmu, idx, sid); 261 } 262 263 static bool smmu_pmu_check_global_filter(struct perf_event *curr, 264 struct perf_event *new) 265 { 266 if (get_filter_enable(new) != get_filter_enable(curr)) 267 return false; 268 269 if (!get_filter_enable(new)) 270 return true; 271 272 return get_filter_span(new) == get_filter_span(curr) && 273 get_filter_stream_id(new) == get_filter_stream_id(curr); 274 } 275 276 static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, 277 struct perf_event *event, int idx) 278 { 279 u32 span, sid; 280 unsigned int cur_idx, num_ctrs = smmu_pmu->num_counters; 281 bool filter_en = !!get_filter_enable(event); 282 283 span = filter_en ? get_filter_span(event) : 284 SMMU_PMCG_DEFAULT_FILTER_SPAN; 285 sid = filter_en ? get_filter_stream_id(event) : 286 SMMU_PMCG_DEFAULT_FILTER_SID; 287 288 cur_idx = find_first_bit(smmu_pmu->used_counters, num_ctrs); 289 /* 290 * Per-counter filtering, or scheduling the first globally-filtered 291 * event into an empty PMU so idx == 0 and it works out equivalent. 292 */ 293 if (!smmu_pmu->global_filter || cur_idx == num_ctrs) { 294 smmu_pmu_set_event_filter(event, idx, span, sid); 295 return 0; 296 } 297 298 /* Otherwise, must match whatever's currently scheduled */ 299 if (smmu_pmu_check_global_filter(smmu_pmu->events[cur_idx], event)) { 300 smmu_pmu_set_evtyper(smmu_pmu, idx, get_event(event)); 301 return 0; 302 } 303 304 return -EAGAIN; 305 } 306 307 static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu, 308 struct perf_event *event) 309 { 310 int idx, err; 311 unsigned int num_ctrs = smmu_pmu->num_counters; 312 313 idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs); 314 if (idx == num_ctrs) 315 /* The counters are all in use. */ 316 return -EAGAIN; 317 318 err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx); 319 if (err) 320 return err; 321 322 set_bit(idx, smmu_pmu->used_counters); 323 324 return idx; 325 } 326 327 static bool smmu_pmu_events_compatible(struct perf_event *curr, 328 struct perf_event *new) 329 { 330 if (new->pmu != curr->pmu) 331 return false; 332 333 if (to_smmu_pmu(new->pmu)->global_filter && 334 !smmu_pmu_check_global_filter(curr, new)) 335 return false; 336 337 return true; 338 } 339 340 /* 341 * Implementation of abstract pmu functionality required by 342 * the core perf events code. 343 */ 344 345 static int smmu_pmu_event_init(struct perf_event *event) 346 { 347 struct hw_perf_event *hwc = &event->hw; 348 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 349 struct device *dev = smmu_pmu->dev; 350 struct perf_event *sibling; 351 int group_num_events = 1; 352 u16 event_id; 353 354 if (event->attr.type != event->pmu->type) 355 return -ENOENT; 356 357 if (hwc->sample_period) { 358 dev_dbg(dev, "Sampling not supported\n"); 359 return -EOPNOTSUPP; 360 } 361 362 if (event->cpu < 0) { 363 dev_dbg(dev, "Per-task mode not supported\n"); 364 return -EOPNOTSUPP; 365 } 366 367 /* Verify specified event is supported on this PMU */ 368 event_id = get_event(event); 369 if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS && 370 (!test_bit(event_id, smmu_pmu->supported_events))) { 371 dev_dbg(dev, "Invalid event %d for this PMU\n", event_id); 372 return -EINVAL; 373 } 374 375 /* Don't allow groups with mixed PMUs, except for s/w events */ 376 if (!is_software_event(event->group_leader)) { 377 if (!smmu_pmu_events_compatible(event->group_leader, event)) 378 return -EINVAL; 379 380 if (++group_num_events > smmu_pmu->num_counters) 381 return -EINVAL; 382 } 383 384 for_each_sibling_event(sibling, event->group_leader) { 385 if (is_software_event(sibling)) 386 continue; 387 388 if (!smmu_pmu_events_compatible(sibling, event)) 389 return -EINVAL; 390 391 if (++group_num_events > smmu_pmu->num_counters) 392 return -EINVAL; 393 } 394 395 hwc->idx = -1; 396 397 /* 398 * Ensure all events are on the same cpu so all events are in the 399 * same cpu context, to avoid races on pmu_enable etc. 400 */ 401 event->cpu = smmu_pmu->on_cpu; 402 403 return 0; 404 } 405 406 static void smmu_pmu_event_start(struct perf_event *event, int flags) 407 { 408 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 409 struct hw_perf_event *hwc = &event->hw; 410 int idx = hwc->idx; 411 412 hwc->state = 0; 413 414 smmu_pmu_set_period(smmu_pmu, hwc); 415 416 smmu_pmu_counter_enable(smmu_pmu, idx); 417 } 418 419 static void smmu_pmu_event_stop(struct perf_event *event, int flags) 420 { 421 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 422 struct hw_perf_event *hwc = &event->hw; 423 int idx = hwc->idx; 424 425 if (hwc->state & PERF_HES_STOPPED) 426 return; 427 428 smmu_pmu_counter_disable(smmu_pmu, idx); 429 /* As the counter gets updated on _start, ignore PERF_EF_UPDATE */ 430 smmu_pmu_event_update(event); 431 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 432 } 433 434 static int smmu_pmu_event_add(struct perf_event *event, int flags) 435 { 436 struct hw_perf_event *hwc = &event->hw; 437 int idx; 438 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 439 440 idx = smmu_pmu_get_event_idx(smmu_pmu, event); 441 if (idx < 0) 442 return idx; 443 444 hwc->idx = idx; 445 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 446 smmu_pmu->events[idx] = event; 447 local64_set(&hwc->prev_count, 0); 448 449 smmu_pmu_interrupt_enable(smmu_pmu, idx); 450 451 if (flags & PERF_EF_START) 452 smmu_pmu_event_start(event, flags); 453 454 /* Propagate changes to the userspace mapping. */ 455 perf_event_update_userpage(event); 456 457 return 0; 458 } 459 460 static void smmu_pmu_event_del(struct perf_event *event, int flags) 461 { 462 struct hw_perf_event *hwc = &event->hw; 463 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 464 int idx = hwc->idx; 465 466 smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE); 467 smmu_pmu_interrupt_disable(smmu_pmu, idx); 468 smmu_pmu->events[idx] = NULL; 469 clear_bit(idx, smmu_pmu->used_counters); 470 471 perf_event_update_userpage(event); 472 } 473 474 static void smmu_pmu_event_read(struct perf_event *event) 475 { 476 smmu_pmu_event_update(event); 477 } 478 479 /* cpumask */ 480 481 static ssize_t smmu_pmu_cpumask_show(struct device *dev, 482 struct device_attribute *attr, 483 char *buf) 484 { 485 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 486 487 return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu)); 488 } 489 490 static struct device_attribute smmu_pmu_cpumask_attr = 491 __ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL); 492 493 static struct attribute *smmu_pmu_cpumask_attrs[] = { 494 &smmu_pmu_cpumask_attr.attr, 495 NULL 496 }; 497 498 static const struct attribute_group smmu_pmu_cpumask_group = { 499 .attrs = smmu_pmu_cpumask_attrs, 500 }; 501 502 /* Events */ 503 504 static ssize_t smmu_pmu_event_show(struct device *dev, 505 struct device_attribute *attr, char *page) 506 { 507 struct perf_pmu_events_attr *pmu_attr; 508 509 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); 510 511 return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); 512 } 513 514 #define SMMU_EVENT_ATTR(name, config) \ 515 PMU_EVENT_ATTR_ID(name, smmu_pmu_event_show, config) 516 517 static struct attribute *smmu_pmu_events[] = { 518 SMMU_EVENT_ATTR(cycles, 0), 519 SMMU_EVENT_ATTR(transaction, 1), 520 SMMU_EVENT_ATTR(tlb_miss, 2), 521 SMMU_EVENT_ATTR(config_cache_miss, 3), 522 SMMU_EVENT_ATTR(trans_table_walk_access, 4), 523 SMMU_EVENT_ATTR(config_struct_access, 5), 524 SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6), 525 SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7), 526 NULL 527 }; 528 529 static umode_t smmu_pmu_event_is_visible(struct kobject *kobj, 530 struct attribute *attr, int unused) 531 { 532 struct device *dev = kobj_to_dev(kobj); 533 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 534 struct perf_pmu_events_attr *pmu_attr; 535 536 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); 537 538 if (test_bit(pmu_attr->id, smmu_pmu->supported_events)) 539 return attr->mode; 540 541 return 0; 542 } 543 544 static const struct attribute_group smmu_pmu_events_group = { 545 .name = "events", 546 .attrs = smmu_pmu_events, 547 .is_visible = smmu_pmu_event_is_visible, 548 }; 549 550 static ssize_t smmu_pmu_identifier_attr_show(struct device *dev, 551 struct device_attribute *attr, 552 char *page) 553 { 554 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 555 556 return sysfs_emit(page, "0x%08x\n", smmu_pmu->iidr); 557 } 558 559 static umode_t smmu_pmu_identifier_attr_visible(struct kobject *kobj, 560 struct attribute *attr, 561 int n) 562 { 563 struct device *dev = kobj_to_dev(kobj); 564 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 565 566 if (!smmu_pmu->iidr) 567 return 0; 568 return attr->mode; 569 } 570 571 static struct device_attribute smmu_pmu_identifier_attr = 572 __ATTR(identifier, 0444, smmu_pmu_identifier_attr_show, NULL); 573 574 static struct attribute *smmu_pmu_identifier_attrs[] = { 575 &smmu_pmu_identifier_attr.attr, 576 NULL 577 }; 578 579 static const struct attribute_group smmu_pmu_identifier_group = { 580 .attrs = smmu_pmu_identifier_attrs, 581 .is_visible = smmu_pmu_identifier_attr_visible, 582 }; 583 584 /* Formats */ 585 PMU_FORMAT_ATTR(event, "config:0-15"); 586 PMU_FORMAT_ATTR(filter_stream_id, "config1:0-31"); 587 PMU_FORMAT_ATTR(filter_span, "config1:32"); 588 PMU_FORMAT_ATTR(filter_enable, "config1:33"); 589 590 static struct attribute *smmu_pmu_formats[] = { 591 &format_attr_event.attr, 592 &format_attr_filter_stream_id.attr, 593 &format_attr_filter_span.attr, 594 &format_attr_filter_enable.attr, 595 NULL 596 }; 597 598 static const struct attribute_group smmu_pmu_format_group = { 599 .name = "format", 600 .attrs = smmu_pmu_formats, 601 }; 602 603 static const struct attribute_group *smmu_pmu_attr_grps[] = { 604 &smmu_pmu_cpumask_group, 605 &smmu_pmu_events_group, 606 &smmu_pmu_format_group, 607 &smmu_pmu_identifier_group, 608 NULL 609 }; 610 611 /* 612 * Generic device handlers 613 */ 614 615 static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) 616 { 617 struct smmu_pmu *smmu_pmu; 618 unsigned int target; 619 620 smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node); 621 if (cpu != smmu_pmu->on_cpu) 622 return 0; 623 624 target = cpumask_any_but(cpu_online_mask, cpu); 625 if (target >= nr_cpu_ids) 626 return 0; 627 628 perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target); 629 smmu_pmu->on_cpu = target; 630 WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target))); 631 632 return 0; 633 } 634 635 static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) 636 { 637 struct smmu_pmu *smmu_pmu = data; 638 u64 ovsr; 639 unsigned int idx; 640 641 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0); 642 if (!ovsr) 643 return IRQ_NONE; 644 645 writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); 646 647 for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) { 648 struct perf_event *event = smmu_pmu->events[idx]; 649 struct hw_perf_event *hwc; 650 651 if (WARN_ON_ONCE(!event)) 652 continue; 653 654 smmu_pmu_event_update(event); 655 hwc = &event->hw; 656 657 smmu_pmu_set_period(smmu_pmu, hwc); 658 } 659 660 return IRQ_HANDLED; 661 } 662 663 static void smmu_pmu_free_msis(void *data) 664 { 665 struct device *dev = data; 666 667 platform_msi_domain_free_irqs(dev); 668 } 669 670 static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 671 { 672 phys_addr_t doorbell; 673 struct device *dev = msi_desc_to_dev(desc); 674 struct smmu_pmu *pmu = dev_get_drvdata(dev); 675 676 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 677 doorbell &= MSI_CFG0_ADDR_MASK; 678 679 writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); 680 writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1); 681 writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, 682 pmu->reg_base + SMMU_PMCG_IRQ_CFG2); 683 } 684 685 static void smmu_pmu_setup_msi(struct smmu_pmu *pmu) 686 { 687 struct msi_desc *desc; 688 struct device *dev = pmu->dev; 689 int ret; 690 691 /* Clear MSI address reg */ 692 writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); 693 694 /* MSI supported or not */ 695 if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI)) 696 return; 697 698 ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); 699 if (ret) { 700 dev_warn(dev, "failed to allocate MSIs\n"); 701 return; 702 } 703 704 desc = first_msi_entry(dev); 705 if (desc) 706 pmu->irq = desc->irq; 707 708 /* Add callback to free MSIs on teardown */ 709 devm_add_action(dev, smmu_pmu_free_msis, dev); 710 } 711 712 static int smmu_pmu_setup_irq(struct smmu_pmu *pmu) 713 { 714 unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD; 715 int irq, ret = -ENXIO; 716 717 smmu_pmu_setup_msi(pmu); 718 719 irq = pmu->irq; 720 if (irq) 721 ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq, 722 flags, "smmuv3-pmu", pmu); 723 return ret; 724 } 725 726 static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu) 727 { 728 u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0); 729 730 smmu_pmu_disable(&smmu_pmu->pmu); 731 732 /* Disable counter and interrupt */ 733 writeq_relaxed(counter_present_mask, 734 smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); 735 writeq_relaxed(counter_present_mask, 736 smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); 737 writeq_relaxed(counter_present_mask, 738 smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); 739 } 740 741 static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) 742 { 743 u32 model; 744 745 model = *(u32 *)dev_get_platdata(smmu_pmu->dev); 746 747 switch (model) { 748 case IORT_SMMU_V3_PMCG_HISI_HIP08: 749 /* HiSilicon Erratum 162001800 */ 750 smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY; 751 break; 752 } 753 754 dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options); 755 } 756 757 static int smmu_pmu_probe(struct platform_device *pdev) 758 { 759 struct smmu_pmu *smmu_pmu; 760 struct resource *res_0; 761 u32 cfgr, reg_size; 762 u64 ceid_64[2]; 763 int irq, err; 764 char *name; 765 struct device *dev = &pdev->dev; 766 767 smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL); 768 if (!smmu_pmu) 769 return -ENOMEM; 770 771 smmu_pmu->dev = dev; 772 platform_set_drvdata(pdev, smmu_pmu); 773 774 smmu_pmu->pmu = (struct pmu) { 775 .module = THIS_MODULE, 776 .task_ctx_nr = perf_invalid_context, 777 .pmu_enable = smmu_pmu_enable, 778 .pmu_disable = smmu_pmu_disable, 779 .event_init = smmu_pmu_event_init, 780 .add = smmu_pmu_event_add, 781 .del = smmu_pmu_event_del, 782 .start = smmu_pmu_event_start, 783 .stop = smmu_pmu_event_stop, 784 .read = smmu_pmu_event_read, 785 .attr_groups = smmu_pmu_attr_grps, 786 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 787 }; 788 789 smmu_pmu->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res_0); 790 if (IS_ERR(smmu_pmu->reg_base)) 791 return PTR_ERR(smmu_pmu->reg_base); 792 793 cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR); 794 795 /* Determine if page 1 is present */ 796 if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) { 797 smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1); 798 if (IS_ERR(smmu_pmu->reloc_base)) 799 return PTR_ERR(smmu_pmu->reloc_base); 800 } else { 801 smmu_pmu->reloc_base = smmu_pmu->reg_base; 802 } 803 804 irq = platform_get_irq_optional(pdev, 0); 805 if (irq > 0) 806 smmu_pmu->irq = irq; 807 808 ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0); 809 ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1); 810 bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64, 811 SMMU_PMCG_ARCH_MAX_EVENTS); 812 813 smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1; 814 815 smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE); 816 817 reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr); 818 smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0); 819 820 smmu_pmu_reset(smmu_pmu); 821 822 err = smmu_pmu_setup_irq(smmu_pmu); 823 if (err) { 824 dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start); 825 return err; 826 } 827 828 smmu_pmu->iidr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_IIDR); 829 830 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx", 831 (res_0->start) >> SMMU_PMCG_PA_SHIFT); 832 if (!name) { 833 dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start); 834 return -EINVAL; 835 } 836 837 smmu_pmu_get_acpi_options(smmu_pmu); 838 839 /* Pick one CPU to be the preferred one to use */ 840 smmu_pmu->on_cpu = raw_smp_processor_id(); 841 WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu))); 842 843 err = cpuhp_state_add_instance_nocalls(cpuhp_state_num, 844 &smmu_pmu->node); 845 if (err) { 846 dev_err(dev, "Error %d registering hotplug, PMU @%pa\n", 847 err, &res_0->start); 848 return err; 849 } 850 851 err = perf_pmu_register(&smmu_pmu->pmu, name, -1); 852 if (err) { 853 dev_err(dev, "Error %d registering PMU @%pa\n", 854 err, &res_0->start); 855 goto out_unregister; 856 } 857 858 dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n", 859 &res_0->start, smmu_pmu->num_counters, 860 smmu_pmu->global_filter ? "Global(Counter0)" : 861 "Individual"); 862 863 return 0; 864 865 out_unregister: 866 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 867 return err; 868 } 869 870 static int smmu_pmu_remove(struct platform_device *pdev) 871 { 872 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); 873 874 perf_pmu_unregister(&smmu_pmu->pmu); 875 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 876 877 return 0; 878 } 879 880 static void smmu_pmu_shutdown(struct platform_device *pdev) 881 { 882 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); 883 884 smmu_pmu_disable(&smmu_pmu->pmu); 885 } 886 887 static struct platform_driver smmu_pmu_driver = { 888 .driver = { 889 .name = "arm-smmu-v3-pmcg", 890 .suppress_bind_attrs = true, 891 }, 892 .probe = smmu_pmu_probe, 893 .remove = smmu_pmu_remove, 894 .shutdown = smmu_pmu_shutdown, 895 }; 896 897 static int __init arm_smmu_pmu_init(void) 898 { 899 cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 900 "perf/arm/pmcg:online", 901 NULL, 902 smmu_pmu_offline_cpu); 903 if (cpuhp_state_num < 0) 904 return cpuhp_state_num; 905 906 return platform_driver_register(&smmu_pmu_driver); 907 } 908 module_init(arm_smmu_pmu_init); 909 910 static void __exit arm_smmu_pmu_exit(void) 911 { 912 platform_driver_unregister(&smmu_pmu_driver); 913 cpuhp_remove_multi_state(cpuhp_state_num); 914 } 915 916 module_exit(arm_smmu_pmu_exit); 917 918 MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); 919 MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>"); 920 MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>"); 921 MODULE_LICENSE("GPL v2"); 922