1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * CAVIUM THUNDERX2 SoC PMU UNCORE 4 * Copyright (C) 2018 Cavium Inc. 5 * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com> 6 */ 7 8 #include <linux/acpi.h> 9 #include <linux/cpuhotplug.h> 10 #include <linux/perf_event.h> 11 #include <linux/platform_device.h> 12 13 /* Each ThunderX2(TX2) Socket has a L3C and DMC UNCORE PMU device. 14 * Each UNCORE PMU device consists of 4 independent programmable counters. 15 * Counters are 32 bit and do not support overflow interrupt, 16 * they need to be sampled before overflow(i.e, at every 2 seconds). 17 */ 18 19 #define TX2_PMU_DMC_L3C_MAX_COUNTERS 4 20 #define TX2_PMU_CCPI2_MAX_COUNTERS 8 21 #define TX2_PMU_MAX_COUNTERS TX2_PMU_CCPI2_MAX_COUNTERS 22 23 24 #define TX2_PMU_DMC_CHANNELS 8 25 #define TX2_PMU_L3_TILES 16 26 27 #define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC) 28 #define GET_EVENTID(ev, mask) ((ev->hw.config) & mask) 29 #define GET_COUNTERID(ev, mask) ((ev->hw.idx) & mask) 30 /* 1 byte per counter(4 counters). 31 * Event id is encoded in bits [5:1] of a byte, 32 */ 33 #define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1)) 34 35 /* bits[3:0] to select counters, are indexed from 8 to 15. */ 36 #define CCPI2_COUNTER_OFFSET 8 37 38 #define L3C_COUNTER_CTL 0xA8 39 #define L3C_COUNTER_DATA 0xAC 40 #define DMC_COUNTER_CTL 0x234 41 #define DMC_COUNTER_DATA 0x240 42 43 #define CCPI2_PERF_CTL 0x108 44 #define CCPI2_COUNTER_CTL 0x10C 45 #define CCPI2_COUNTER_SEL 0x12c 46 #define CCPI2_COUNTER_DATA_L 0x130 47 #define CCPI2_COUNTER_DATA_H 0x134 48 49 /* L3C event IDs */ 50 #define L3_EVENT_READ_REQ 0xD 51 #define L3_EVENT_WRITEBACK_REQ 0xE 52 #define L3_EVENT_INV_N_WRITE_REQ 0xF 53 #define L3_EVENT_INV_REQ 0x10 54 #define L3_EVENT_EVICT_REQ 0x13 55 #define L3_EVENT_INV_N_WRITE_HIT 0x14 56 #define L3_EVENT_INV_HIT 0x15 57 #define L3_EVENT_READ_HIT 0x17 58 #define L3_EVENT_MAX 0x18 59 60 /* DMC event IDs */ 61 #define DMC_EVENT_COUNT_CYCLES 0x1 62 #define DMC_EVENT_WRITE_TXNS 0xB 63 #define DMC_EVENT_DATA_TRANSFERS 0xD 64 #define DMC_EVENT_READ_TXNS 0xF 65 #define DMC_EVENT_MAX 0x10 66 67 #define CCPI2_EVENT_REQ_PKT_SENT 0x3D 68 #define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 69 #define CCPI2_EVENT_DATA_PKT_SENT 0x105 70 #define CCPI2_EVENT_GIC_PKT_SENT 0x12D 71 #define CCPI2_EVENT_MAX 0x200 72 73 #define CCPI2_PERF_CTL_ENABLE BIT(0) 74 #define CCPI2_PERF_CTL_START BIT(1) 75 #define CCPI2_PERF_CTL_RESET BIT(4) 76 #define CCPI2_EVENT_LEVEL_RISING_EDGE BIT(10) 77 #define CCPI2_EVENT_TYPE_EDGE_SENSITIVE BIT(11) 78 79 enum tx2_uncore_type { 80 PMU_TYPE_L3C, 81 PMU_TYPE_DMC, 82 PMU_TYPE_CCPI2, 83 PMU_TYPE_INVALID, 84 }; 85 86 /* 87 * Each socket has 3 uncore devices associated with a PMU. The DMC and 88 * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters. 89 */ 90 struct tx2_uncore_pmu { 91 struct hlist_node hpnode; 92 struct list_head entry; 93 struct pmu pmu; 94 char *name; 95 int node; 96 int cpu; 97 u32 max_counters; 98 u32 counters_mask; 99 u32 prorate_factor; 100 u32 max_events; 101 u32 events_mask; 102 u64 hrtimer_interval; 103 void __iomem *base; 104 DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS); 105 struct perf_event *events[TX2_PMU_MAX_COUNTERS]; 106 struct device *dev; 107 struct hrtimer hrtimer; 108 const struct attribute_group **attr_groups; 109 enum tx2_uncore_type type; 110 enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb); 111 void (*init_cntr_base)(struct perf_event *event, 112 struct tx2_uncore_pmu *tx2_pmu); 113 void (*stop_event)(struct perf_event *event); 114 void (*start_event)(struct perf_event *event, int flags); 115 }; 116 117 static LIST_HEAD(tx2_pmus); 118 119 static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu) 120 { 121 return container_of(pmu, struct tx2_uncore_pmu, pmu); 122 } 123 124 #define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \ 125 static ssize_t \ 126 __tx2_pmu_##_var##_show(struct device *dev, \ 127 struct device_attribute *attr, \ 128 char *page) \ 129 { \ 130 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ 131 return sprintf(page, _format "\n"); \ 132 } \ 133 \ 134 static struct device_attribute format_attr_##_var = \ 135 __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL) 136 137 TX2_PMU_FORMAT_ATTR(event, event, "config:0-4"); 138 TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9"); 139 140 static struct attribute *l3c_pmu_format_attrs[] = { 141 &format_attr_event.attr, 142 NULL, 143 }; 144 145 static struct attribute *dmc_pmu_format_attrs[] = { 146 &format_attr_event.attr, 147 NULL, 148 }; 149 150 static struct attribute *ccpi2_pmu_format_attrs[] = { 151 &format_attr_event_ccpi2.attr, 152 NULL, 153 }; 154 155 static const struct attribute_group l3c_pmu_format_attr_group = { 156 .name = "format", 157 .attrs = l3c_pmu_format_attrs, 158 }; 159 160 static const struct attribute_group dmc_pmu_format_attr_group = { 161 .name = "format", 162 .attrs = dmc_pmu_format_attrs, 163 }; 164 165 static const struct attribute_group ccpi2_pmu_format_attr_group = { 166 .name = "format", 167 .attrs = ccpi2_pmu_format_attrs, 168 }; 169 170 /* 171 * sysfs event attributes 172 */ 173 static ssize_t tx2_pmu_event_show(struct device *dev, 174 struct device_attribute *attr, char *buf) 175 { 176 struct dev_ext_attribute *eattr; 177 178 eattr = container_of(attr, struct dev_ext_attribute, attr); 179 return sprintf(buf, "event=0x%lx\n", (unsigned long) eattr->var); 180 } 181 182 #define TX2_EVENT_ATTR(name, config) \ 183 PMU_EVENT_ATTR(name, tx2_pmu_event_attr_##name, \ 184 config, tx2_pmu_event_show) 185 186 TX2_EVENT_ATTR(read_request, L3_EVENT_READ_REQ); 187 TX2_EVENT_ATTR(writeback_request, L3_EVENT_WRITEBACK_REQ); 188 TX2_EVENT_ATTR(inv_nwrite_request, L3_EVENT_INV_N_WRITE_REQ); 189 TX2_EVENT_ATTR(inv_request, L3_EVENT_INV_REQ); 190 TX2_EVENT_ATTR(evict_request, L3_EVENT_EVICT_REQ); 191 TX2_EVENT_ATTR(inv_nwrite_hit, L3_EVENT_INV_N_WRITE_HIT); 192 TX2_EVENT_ATTR(inv_hit, L3_EVENT_INV_HIT); 193 TX2_EVENT_ATTR(read_hit, L3_EVENT_READ_HIT); 194 195 static struct attribute *l3c_pmu_events_attrs[] = { 196 &tx2_pmu_event_attr_read_request.attr.attr, 197 &tx2_pmu_event_attr_writeback_request.attr.attr, 198 &tx2_pmu_event_attr_inv_nwrite_request.attr.attr, 199 &tx2_pmu_event_attr_inv_request.attr.attr, 200 &tx2_pmu_event_attr_evict_request.attr.attr, 201 &tx2_pmu_event_attr_inv_nwrite_hit.attr.attr, 202 &tx2_pmu_event_attr_inv_hit.attr.attr, 203 &tx2_pmu_event_attr_read_hit.attr.attr, 204 NULL, 205 }; 206 207 TX2_EVENT_ATTR(cnt_cycles, DMC_EVENT_COUNT_CYCLES); 208 TX2_EVENT_ATTR(write_txns, DMC_EVENT_WRITE_TXNS); 209 TX2_EVENT_ATTR(data_transfers, DMC_EVENT_DATA_TRANSFERS); 210 TX2_EVENT_ATTR(read_txns, DMC_EVENT_READ_TXNS); 211 212 static struct attribute *dmc_pmu_events_attrs[] = { 213 &tx2_pmu_event_attr_cnt_cycles.attr.attr, 214 &tx2_pmu_event_attr_write_txns.attr.attr, 215 &tx2_pmu_event_attr_data_transfers.attr.attr, 216 &tx2_pmu_event_attr_read_txns.attr.attr, 217 NULL, 218 }; 219 220 TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT); 221 TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT); 222 TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT); 223 TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT); 224 225 static struct attribute *ccpi2_pmu_events_attrs[] = { 226 &tx2_pmu_event_attr_req_pktsent.attr.attr, 227 &tx2_pmu_event_attr_snoop_pktsent.attr.attr, 228 &tx2_pmu_event_attr_data_pktsent.attr.attr, 229 &tx2_pmu_event_attr_gic_pktsent.attr.attr, 230 NULL, 231 }; 232 233 static const struct attribute_group l3c_pmu_events_attr_group = { 234 .name = "events", 235 .attrs = l3c_pmu_events_attrs, 236 }; 237 238 static const struct attribute_group dmc_pmu_events_attr_group = { 239 .name = "events", 240 .attrs = dmc_pmu_events_attrs, 241 }; 242 243 static const struct attribute_group ccpi2_pmu_events_attr_group = { 244 .name = "events", 245 .attrs = ccpi2_pmu_events_attrs, 246 }; 247 248 /* 249 * sysfs cpumask attributes 250 */ 251 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, 252 char *buf) 253 { 254 struct tx2_uncore_pmu *tx2_pmu; 255 256 tx2_pmu = pmu_to_tx2_pmu(dev_get_drvdata(dev)); 257 return cpumap_print_to_pagebuf(true, buf, cpumask_of(tx2_pmu->cpu)); 258 } 259 static DEVICE_ATTR_RO(cpumask); 260 261 static struct attribute *tx2_pmu_cpumask_attrs[] = { 262 &dev_attr_cpumask.attr, 263 NULL, 264 }; 265 266 static const struct attribute_group pmu_cpumask_attr_group = { 267 .attrs = tx2_pmu_cpumask_attrs, 268 }; 269 270 /* 271 * Per PMU device attribute groups 272 */ 273 static const struct attribute_group *l3c_pmu_attr_groups[] = { 274 &l3c_pmu_format_attr_group, 275 &pmu_cpumask_attr_group, 276 &l3c_pmu_events_attr_group, 277 NULL 278 }; 279 280 static const struct attribute_group *dmc_pmu_attr_groups[] = { 281 &dmc_pmu_format_attr_group, 282 &pmu_cpumask_attr_group, 283 &dmc_pmu_events_attr_group, 284 NULL 285 }; 286 287 static const struct attribute_group *ccpi2_pmu_attr_groups[] = { 288 &ccpi2_pmu_format_attr_group, 289 &pmu_cpumask_attr_group, 290 &ccpi2_pmu_events_attr_group, 291 NULL 292 }; 293 294 static inline u32 reg_readl(unsigned long addr) 295 { 296 return readl((void __iomem *)addr); 297 } 298 299 static inline void reg_writel(u32 val, unsigned long addr) 300 { 301 writel(val, (void __iomem *)addr); 302 } 303 304 static int alloc_counter(struct tx2_uncore_pmu *tx2_pmu) 305 { 306 int counter; 307 308 counter = find_first_zero_bit(tx2_pmu->active_counters, 309 tx2_pmu->max_counters); 310 if (counter == tx2_pmu->max_counters) 311 return -ENOSPC; 312 313 set_bit(counter, tx2_pmu->active_counters); 314 return counter; 315 } 316 317 static inline void free_counter(struct tx2_uncore_pmu *tx2_pmu, int counter) 318 { 319 clear_bit(counter, tx2_pmu->active_counters); 320 } 321 322 static void init_cntr_base_l3c(struct perf_event *event, 323 struct tx2_uncore_pmu *tx2_pmu) 324 { 325 struct hw_perf_event *hwc = &event->hw; 326 u32 cmask; 327 328 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 329 cmask = tx2_pmu->counters_mask; 330 331 /* counter ctrl/data reg offset at 8 */ 332 hwc->config_base = (unsigned long)tx2_pmu->base 333 + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask)); 334 hwc->event_base = (unsigned long)tx2_pmu->base 335 + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask)); 336 } 337 338 static void init_cntr_base_dmc(struct perf_event *event, 339 struct tx2_uncore_pmu *tx2_pmu) 340 { 341 struct hw_perf_event *hwc = &event->hw; 342 u32 cmask; 343 344 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 345 cmask = tx2_pmu->counters_mask; 346 347 hwc->config_base = (unsigned long)tx2_pmu->base 348 + DMC_COUNTER_CTL; 349 /* counter data reg offset at 0xc */ 350 hwc->event_base = (unsigned long)tx2_pmu->base 351 + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask)); 352 } 353 354 static void init_cntr_base_ccpi2(struct perf_event *event, 355 struct tx2_uncore_pmu *tx2_pmu) 356 { 357 struct hw_perf_event *hwc = &event->hw; 358 u32 cmask; 359 360 cmask = tx2_pmu->counters_mask; 361 362 hwc->config_base = (unsigned long)tx2_pmu->base 363 + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask)); 364 hwc->event_base = (unsigned long)tx2_pmu->base; 365 } 366 367 static void uncore_start_event_l3c(struct perf_event *event, int flags) 368 { 369 u32 val, emask; 370 struct hw_perf_event *hwc = &event->hw; 371 struct tx2_uncore_pmu *tx2_pmu; 372 373 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 374 emask = tx2_pmu->events_mask; 375 376 /* event id encoded in bits [07:03] */ 377 val = GET_EVENTID(event, emask) << 3; 378 reg_writel(val, hwc->config_base); 379 local64_set(&hwc->prev_count, 0); 380 reg_writel(0, hwc->event_base); 381 } 382 383 static inline void uncore_stop_event_l3c(struct perf_event *event) 384 { 385 reg_writel(0, event->hw.config_base); 386 } 387 388 static void uncore_start_event_dmc(struct perf_event *event, int flags) 389 { 390 u32 val, cmask, emask; 391 struct hw_perf_event *hwc = &event->hw; 392 struct tx2_uncore_pmu *tx2_pmu; 393 int idx, event_id; 394 395 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 396 cmask = tx2_pmu->counters_mask; 397 emask = tx2_pmu->events_mask; 398 399 idx = GET_COUNTERID(event, cmask); 400 event_id = GET_EVENTID(event, emask); 401 402 /* enable and start counters. 403 * 8 bits for each counter, bits[05:01] of a counter to set event type. 404 */ 405 val = reg_readl(hwc->config_base); 406 val &= ~DMC_EVENT_CFG(idx, 0x1f); 407 val |= DMC_EVENT_CFG(idx, event_id); 408 reg_writel(val, hwc->config_base); 409 local64_set(&hwc->prev_count, 0); 410 reg_writel(0, hwc->event_base); 411 } 412 413 static void uncore_stop_event_dmc(struct perf_event *event) 414 { 415 u32 val, cmask; 416 struct hw_perf_event *hwc = &event->hw; 417 struct tx2_uncore_pmu *tx2_pmu; 418 int idx; 419 420 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 421 cmask = tx2_pmu->counters_mask; 422 idx = GET_COUNTERID(event, cmask); 423 424 /* clear event type(bits[05:01]) to stop counter */ 425 val = reg_readl(hwc->config_base); 426 val &= ~DMC_EVENT_CFG(idx, 0x1f); 427 reg_writel(val, hwc->config_base); 428 } 429 430 static void uncore_start_event_ccpi2(struct perf_event *event, int flags) 431 { 432 u32 emask; 433 struct hw_perf_event *hwc = &event->hw; 434 struct tx2_uncore_pmu *tx2_pmu; 435 436 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 437 emask = tx2_pmu->events_mask; 438 439 /* Bit [09:00] to set event id. 440 * Bits [10], set level to rising edge. 441 * Bits [11], set type to edge sensitive. 442 */ 443 reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE | 444 CCPI2_EVENT_LEVEL_RISING_EDGE | 445 GET_EVENTID(event, emask)), hwc->config_base); 446 447 /* reset[4], enable[0] and start[1] counters */ 448 reg_writel(CCPI2_PERF_CTL_RESET | 449 CCPI2_PERF_CTL_START | 450 CCPI2_PERF_CTL_ENABLE, 451 hwc->event_base + CCPI2_PERF_CTL); 452 local64_set(&event->hw.prev_count, 0ULL); 453 } 454 455 static void uncore_stop_event_ccpi2(struct perf_event *event) 456 { 457 struct hw_perf_event *hwc = &event->hw; 458 459 /* disable and stop counter */ 460 reg_writel(0, hwc->event_base + CCPI2_PERF_CTL); 461 } 462 463 static void tx2_uncore_event_update(struct perf_event *event) 464 { 465 u64 prev, delta, new = 0; 466 struct hw_perf_event *hwc = &event->hw; 467 struct tx2_uncore_pmu *tx2_pmu; 468 enum tx2_uncore_type type; 469 u32 prorate_factor; 470 u32 cmask, emask; 471 472 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 473 type = tx2_pmu->type; 474 cmask = tx2_pmu->counters_mask; 475 emask = tx2_pmu->events_mask; 476 prorate_factor = tx2_pmu->prorate_factor; 477 if (type == PMU_TYPE_CCPI2) { 478 reg_writel(CCPI2_COUNTER_OFFSET + 479 GET_COUNTERID(event, cmask), 480 hwc->event_base + CCPI2_COUNTER_SEL); 481 new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H); 482 new = (new << 32) + 483 reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L); 484 prev = local64_xchg(&hwc->prev_count, new); 485 delta = new - prev; 486 } else { 487 new = reg_readl(hwc->event_base); 488 prev = local64_xchg(&hwc->prev_count, new); 489 /* handles rollover of 32 bit counter */ 490 delta = (u32)(((1UL << 32) - prev) + new); 491 } 492 493 /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ 494 if (type == PMU_TYPE_DMC && 495 GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS) 496 delta = delta/4; 497 498 /* L3C and DMC has 16 and 8 interleave channels respectively. 499 * The sampled value is for channel 0 and multiplied with 500 * prorate_factor to get the count for a device. 501 */ 502 local64_add(delta * prorate_factor, &event->count); 503 } 504 505 static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) 506 { 507 int i = 0; 508 struct acpi_tx2_pmu_device { 509 __u8 id[ACPI_ID_LEN]; 510 enum tx2_uncore_type type; 511 } devices[] = { 512 {"CAV901D", PMU_TYPE_L3C}, 513 {"CAV901F", PMU_TYPE_DMC}, 514 {"CAV901E", PMU_TYPE_CCPI2}, 515 {"", PMU_TYPE_INVALID} 516 }; 517 518 while (devices[i].type != PMU_TYPE_INVALID) { 519 if (!strcmp(acpi_device_hid(adev), devices[i].id)) 520 break; 521 i++; 522 } 523 524 return devices[i].type; 525 } 526 527 static bool tx2_uncore_validate_event(struct pmu *pmu, 528 struct perf_event *event, int *counters) 529 { 530 if (is_software_event(event)) 531 return true; 532 /* Reject groups spanning multiple HW PMUs. */ 533 if (event->pmu != pmu) 534 return false; 535 536 *counters = *counters + 1; 537 return true; 538 } 539 540 /* 541 * Make sure the group of events can be scheduled at once 542 * on the PMU. 543 */ 544 static bool tx2_uncore_validate_event_group(struct perf_event *event, 545 int max_counters) 546 { 547 struct perf_event *sibling, *leader = event->group_leader; 548 int counters = 0; 549 550 if (event->group_leader == event) 551 return true; 552 553 if (!tx2_uncore_validate_event(event->pmu, leader, &counters)) 554 return false; 555 556 for_each_sibling_event(sibling, leader) { 557 if (!tx2_uncore_validate_event(event->pmu, sibling, &counters)) 558 return false; 559 } 560 561 if (!tx2_uncore_validate_event(event->pmu, event, &counters)) 562 return false; 563 564 /* 565 * If the group requires more counters than the HW has, 566 * it cannot ever be scheduled. 567 */ 568 return counters <= max_counters; 569 } 570 571 572 static int tx2_uncore_event_init(struct perf_event *event) 573 { 574 struct hw_perf_event *hwc = &event->hw; 575 struct tx2_uncore_pmu *tx2_pmu; 576 577 /* Test the event attr type check for PMU enumeration */ 578 if (event->attr.type != event->pmu->type) 579 return -ENOENT; 580 581 /* 582 * SOC PMU counters are shared across all cores. 583 * Therefore, it does not support per-process mode. 584 * Also, it does not support event sampling mode. 585 */ 586 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) 587 return -EINVAL; 588 589 if (event->cpu < 0) 590 return -EINVAL; 591 592 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 593 if (tx2_pmu->cpu >= nr_cpu_ids) 594 return -EINVAL; 595 event->cpu = tx2_pmu->cpu; 596 597 if (event->attr.config >= tx2_pmu->max_events) 598 return -EINVAL; 599 600 /* store event id */ 601 hwc->config = event->attr.config; 602 603 /* Validate the group */ 604 if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters)) 605 return -EINVAL; 606 607 return 0; 608 } 609 610 static void tx2_uncore_event_start(struct perf_event *event, int flags) 611 { 612 struct hw_perf_event *hwc = &event->hw; 613 struct tx2_uncore_pmu *tx2_pmu; 614 615 hwc->state = 0; 616 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 617 618 tx2_pmu->start_event(event, flags); 619 perf_event_update_userpage(event); 620 621 /* No hrtimer needed for CCPI2, 64-bit counters */ 622 if (!tx2_pmu->hrtimer_callback) 623 return; 624 625 /* Start timer for first event */ 626 if (bitmap_weight(tx2_pmu->active_counters, 627 tx2_pmu->max_counters) == 1) { 628 hrtimer_start(&tx2_pmu->hrtimer, 629 ns_to_ktime(tx2_pmu->hrtimer_interval), 630 HRTIMER_MODE_REL_PINNED); 631 } 632 } 633 634 static void tx2_uncore_event_stop(struct perf_event *event, int flags) 635 { 636 struct hw_perf_event *hwc = &event->hw; 637 struct tx2_uncore_pmu *tx2_pmu; 638 639 if (hwc->state & PERF_HES_UPTODATE) 640 return; 641 642 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 643 tx2_pmu->stop_event(event); 644 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 645 hwc->state |= PERF_HES_STOPPED; 646 if (flags & PERF_EF_UPDATE) { 647 tx2_uncore_event_update(event); 648 hwc->state |= PERF_HES_UPTODATE; 649 } 650 } 651 652 static int tx2_uncore_event_add(struct perf_event *event, int flags) 653 { 654 struct hw_perf_event *hwc = &event->hw; 655 struct tx2_uncore_pmu *tx2_pmu; 656 657 tx2_pmu = pmu_to_tx2_pmu(event->pmu); 658 659 /* Allocate a free counter */ 660 hwc->idx = alloc_counter(tx2_pmu); 661 if (hwc->idx < 0) 662 return -EAGAIN; 663 664 tx2_pmu->events[hwc->idx] = event; 665 /* set counter control and data registers base address */ 666 tx2_pmu->init_cntr_base(event, tx2_pmu); 667 668 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 669 if (flags & PERF_EF_START) 670 tx2_uncore_event_start(event, flags); 671 672 return 0; 673 } 674 675 static void tx2_uncore_event_del(struct perf_event *event, int flags) 676 { 677 struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu); 678 struct hw_perf_event *hwc = &event->hw; 679 u32 cmask; 680 681 cmask = tx2_pmu->counters_mask; 682 tx2_uncore_event_stop(event, PERF_EF_UPDATE); 683 684 /* clear the assigned counter */ 685 free_counter(tx2_pmu, GET_COUNTERID(event, cmask)); 686 687 perf_event_update_userpage(event); 688 tx2_pmu->events[hwc->idx] = NULL; 689 hwc->idx = -1; 690 691 if (!tx2_pmu->hrtimer_callback) 692 return; 693 694 if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters)) 695 hrtimer_cancel(&tx2_pmu->hrtimer); 696 } 697 698 static void tx2_uncore_event_read(struct perf_event *event) 699 { 700 tx2_uncore_event_update(event); 701 } 702 703 static enum hrtimer_restart tx2_hrtimer_callback(struct hrtimer *timer) 704 { 705 struct tx2_uncore_pmu *tx2_pmu; 706 int max_counters, idx; 707 708 tx2_pmu = container_of(timer, struct tx2_uncore_pmu, hrtimer); 709 max_counters = tx2_pmu->max_counters; 710 711 if (bitmap_empty(tx2_pmu->active_counters, max_counters)) 712 return HRTIMER_NORESTART; 713 714 for_each_set_bit(idx, tx2_pmu->active_counters, max_counters) { 715 struct perf_event *event = tx2_pmu->events[idx]; 716 717 tx2_uncore_event_update(event); 718 } 719 hrtimer_forward_now(timer, ns_to_ktime(tx2_pmu->hrtimer_interval)); 720 return HRTIMER_RESTART; 721 } 722 723 static int tx2_uncore_pmu_register( 724 struct tx2_uncore_pmu *tx2_pmu) 725 { 726 struct device *dev = tx2_pmu->dev; 727 char *name = tx2_pmu->name; 728 729 /* Perf event registration */ 730 tx2_pmu->pmu = (struct pmu) { 731 .module = THIS_MODULE, 732 .attr_groups = tx2_pmu->attr_groups, 733 .task_ctx_nr = perf_invalid_context, 734 .event_init = tx2_uncore_event_init, 735 .add = tx2_uncore_event_add, 736 .del = tx2_uncore_event_del, 737 .start = tx2_uncore_event_start, 738 .stop = tx2_uncore_event_stop, 739 .read = tx2_uncore_event_read, 740 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 741 }; 742 743 tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL, 744 "%s", name); 745 746 return perf_pmu_register(&tx2_pmu->pmu, tx2_pmu->pmu.name, -1); 747 } 748 749 static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) 750 { 751 int ret, cpu; 752 753 cpu = cpumask_any_and(cpumask_of_node(tx2_pmu->node), 754 cpu_online_mask); 755 756 tx2_pmu->cpu = cpu; 757 758 if (tx2_pmu->hrtimer_callback) { 759 hrtimer_init(&tx2_pmu->hrtimer, 760 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 761 tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback; 762 } 763 764 ret = tx2_uncore_pmu_register(tx2_pmu); 765 if (ret) { 766 dev_err(tx2_pmu->dev, "%s PMU: Failed to init driver\n", 767 tx2_pmu->name); 768 return -ENODEV; 769 } 770 771 /* register hotplug callback for the pmu */ 772 ret = cpuhp_state_add_instance( 773 CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, 774 &tx2_pmu->hpnode); 775 if (ret) { 776 dev_err(tx2_pmu->dev, "Error %d registering hotplug", ret); 777 return ret; 778 } 779 780 /* Add to list */ 781 list_add(&tx2_pmu->entry, &tx2_pmus); 782 783 dev_dbg(tx2_pmu->dev, "%s PMU UNCORE registered\n", 784 tx2_pmu->pmu.name); 785 return ret; 786 } 787 788 static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, 789 acpi_handle handle, struct acpi_device *adev, u32 type) 790 { 791 struct tx2_uncore_pmu *tx2_pmu; 792 void __iomem *base; 793 struct resource res; 794 struct resource_entry *rentry; 795 struct list_head list; 796 int ret; 797 798 INIT_LIST_HEAD(&list); 799 ret = acpi_dev_get_resources(adev, &list, NULL, NULL); 800 if (ret <= 0) { 801 dev_err(dev, "failed to parse _CRS method, error %d\n", ret); 802 return NULL; 803 } 804 805 list_for_each_entry(rentry, &list, node) { 806 if (resource_type(rentry->res) == IORESOURCE_MEM) { 807 res = *rentry->res; 808 break; 809 } 810 } 811 812 if (!rentry->res) 813 return NULL; 814 815 acpi_dev_free_resource_list(&list); 816 base = devm_ioremap_resource(dev, &res); 817 if (IS_ERR(base)) { 818 dev_err(dev, "PMU type %d: Fail to map resource\n", type); 819 return NULL; 820 } 821 822 tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL); 823 if (!tx2_pmu) 824 return NULL; 825 826 tx2_pmu->dev = dev; 827 tx2_pmu->type = type; 828 tx2_pmu->base = base; 829 tx2_pmu->node = dev_to_node(dev); 830 INIT_LIST_HEAD(&tx2_pmu->entry); 831 832 switch (tx2_pmu->type) { 833 case PMU_TYPE_L3C: 834 tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; 835 tx2_pmu->counters_mask = 0x3; 836 tx2_pmu->prorate_factor = TX2_PMU_L3_TILES; 837 tx2_pmu->max_events = L3_EVENT_MAX; 838 tx2_pmu->events_mask = 0x1f; 839 tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; 840 tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; 841 tx2_pmu->attr_groups = l3c_pmu_attr_groups; 842 tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, 843 "uncore_l3c_%d", tx2_pmu->node); 844 tx2_pmu->init_cntr_base = init_cntr_base_l3c; 845 tx2_pmu->start_event = uncore_start_event_l3c; 846 tx2_pmu->stop_event = uncore_stop_event_l3c; 847 break; 848 case PMU_TYPE_DMC: 849 tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; 850 tx2_pmu->counters_mask = 0x3; 851 tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS; 852 tx2_pmu->max_events = DMC_EVENT_MAX; 853 tx2_pmu->events_mask = 0x1f; 854 tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; 855 tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; 856 tx2_pmu->attr_groups = dmc_pmu_attr_groups; 857 tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, 858 "uncore_dmc_%d", tx2_pmu->node); 859 tx2_pmu->init_cntr_base = init_cntr_base_dmc; 860 tx2_pmu->start_event = uncore_start_event_dmc; 861 tx2_pmu->stop_event = uncore_stop_event_dmc; 862 break; 863 case PMU_TYPE_CCPI2: 864 /* CCPI2 has 8 counters */ 865 tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS; 866 tx2_pmu->counters_mask = 0x7; 867 tx2_pmu->prorate_factor = 1; 868 tx2_pmu->max_events = CCPI2_EVENT_MAX; 869 tx2_pmu->events_mask = 0x1ff; 870 tx2_pmu->attr_groups = ccpi2_pmu_attr_groups; 871 tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, 872 "uncore_ccpi2_%d", tx2_pmu->node); 873 tx2_pmu->init_cntr_base = init_cntr_base_ccpi2; 874 tx2_pmu->start_event = uncore_start_event_ccpi2; 875 tx2_pmu->stop_event = uncore_stop_event_ccpi2; 876 tx2_pmu->hrtimer_callback = NULL; 877 break; 878 case PMU_TYPE_INVALID: 879 devm_kfree(dev, tx2_pmu); 880 return NULL; 881 } 882 883 return tx2_pmu; 884 } 885 886 static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level, 887 void *data, void **return_value) 888 { 889 struct tx2_uncore_pmu *tx2_pmu; 890 struct acpi_device *adev; 891 enum tx2_uncore_type type; 892 893 if (acpi_bus_get_device(handle, &adev)) 894 return AE_OK; 895 if (acpi_bus_get_status(adev) || !adev->status.present) 896 return AE_OK; 897 898 type = get_tx2_pmu_type(adev); 899 if (type == PMU_TYPE_INVALID) 900 return AE_OK; 901 902 tx2_pmu = tx2_uncore_pmu_init_dev((struct device *)data, 903 handle, adev, type); 904 905 if (!tx2_pmu) 906 return AE_ERROR; 907 908 if (tx2_uncore_pmu_add_dev(tx2_pmu)) { 909 /* Can't add the PMU device, abort */ 910 return AE_ERROR; 911 } 912 return AE_OK; 913 } 914 915 static int tx2_uncore_pmu_online_cpu(unsigned int cpu, 916 struct hlist_node *hpnode) 917 { 918 struct tx2_uncore_pmu *tx2_pmu; 919 920 tx2_pmu = hlist_entry_safe(hpnode, 921 struct tx2_uncore_pmu, hpnode); 922 923 /* Pick this CPU, If there is no CPU/PMU association and both are 924 * from same node. 925 */ 926 if ((tx2_pmu->cpu >= nr_cpu_ids) && 927 (tx2_pmu->node == cpu_to_node(cpu))) 928 tx2_pmu->cpu = cpu; 929 930 return 0; 931 } 932 933 static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, 934 struct hlist_node *hpnode) 935 { 936 int new_cpu; 937 struct tx2_uncore_pmu *tx2_pmu; 938 struct cpumask cpu_online_mask_temp; 939 940 tx2_pmu = hlist_entry_safe(hpnode, 941 struct tx2_uncore_pmu, hpnode); 942 943 if (cpu != tx2_pmu->cpu) 944 return 0; 945 946 if (tx2_pmu->hrtimer_callback) 947 hrtimer_cancel(&tx2_pmu->hrtimer); 948 949 cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); 950 cpumask_clear_cpu(cpu, &cpu_online_mask_temp); 951 new_cpu = cpumask_any_and( 952 cpumask_of_node(tx2_pmu->node), 953 &cpu_online_mask_temp); 954 955 tx2_pmu->cpu = new_cpu; 956 if (new_cpu >= nr_cpu_ids) 957 return 0; 958 perf_pmu_migrate_context(&tx2_pmu->pmu, cpu, new_cpu); 959 960 return 0; 961 } 962 963 static const struct acpi_device_id tx2_uncore_acpi_match[] = { 964 {"CAV901C", 0}, 965 {}, 966 }; 967 MODULE_DEVICE_TABLE(acpi, tx2_uncore_acpi_match); 968 969 static int tx2_uncore_probe(struct platform_device *pdev) 970 { 971 struct device *dev = &pdev->dev; 972 acpi_handle handle; 973 acpi_status status; 974 975 set_dev_node(dev, acpi_get_node(ACPI_HANDLE(dev))); 976 977 if (!has_acpi_companion(dev)) 978 return -ENODEV; 979 980 handle = ACPI_HANDLE(dev); 981 if (!handle) 982 return -EINVAL; 983 984 /* Walk through the tree for all PMU UNCORE devices */ 985 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, 986 tx2_uncore_pmu_add, 987 NULL, dev, NULL); 988 if (ACPI_FAILURE(status)) { 989 dev_err(dev, "failed to probe PMU devices\n"); 990 return_ACPI_STATUS(status); 991 } 992 993 dev_info(dev, "node%d: pmu uncore registered\n", dev_to_node(dev)); 994 return 0; 995 } 996 997 static int tx2_uncore_remove(struct platform_device *pdev) 998 { 999 struct tx2_uncore_pmu *tx2_pmu, *temp; 1000 struct device *dev = &pdev->dev; 1001 1002 if (!list_empty(&tx2_pmus)) { 1003 list_for_each_entry_safe(tx2_pmu, temp, &tx2_pmus, entry) { 1004 if (tx2_pmu->node == dev_to_node(dev)) { 1005 cpuhp_state_remove_instance_nocalls( 1006 CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, 1007 &tx2_pmu->hpnode); 1008 perf_pmu_unregister(&tx2_pmu->pmu); 1009 list_del(&tx2_pmu->entry); 1010 } 1011 } 1012 } 1013 return 0; 1014 } 1015 1016 static struct platform_driver tx2_uncore_driver = { 1017 .driver = { 1018 .name = "tx2-uncore-pmu", 1019 .acpi_match_table = ACPI_PTR(tx2_uncore_acpi_match), 1020 }, 1021 .probe = tx2_uncore_probe, 1022 .remove = tx2_uncore_remove, 1023 }; 1024 1025 static int __init tx2_uncore_driver_init(void) 1026 { 1027 int ret; 1028 1029 ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, 1030 "perf/tx2/uncore:online", 1031 tx2_uncore_pmu_online_cpu, 1032 tx2_uncore_pmu_offline_cpu); 1033 if (ret) { 1034 pr_err("TX2 PMU: setup hotplug failed(%d)\n", ret); 1035 return ret; 1036 } 1037 ret = platform_driver_register(&tx2_uncore_driver); 1038 if (ret) 1039 cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE); 1040 1041 return ret; 1042 } 1043 module_init(tx2_uncore_driver_init); 1044 1045 static void __exit tx2_uncore_driver_exit(void) 1046 { 1047 platform_driver_unregister(&tx2_uncore_driver); 1048 cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE); 1049 } 1050 module_exit(tx2_uncore_driver_exit); 1051 1052 MODULE_DESCRIPTION("ThunderX2 UNCORE PMU driver"); 1053 MODULE_LICENSE("GPL v2"); 1054 MODULE_AUTHOR("Ganapatrao Kulkarni <gkulkarni@cavium.com>"); 1055