1 /* 2 * CCI cache coherent interconnect driver 3 * 4 * Copyright (C) 2013 ARM Ltd. 5 * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any 12 * kind, whether express or implied; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 */ 16 17 #include <linux/arm-cci.h> 18 #include <linux/io.h> 19 #include <linux/interrupt.h> 20 #include <linux/module.h> 21 #include <linux/of_address.h> 22 #include <linux/of_irq.h> 23 #include <linux/of_platform.h> 24 #include <linux/perf_event.h> 25 #include <linux/platform_device.h> 26 #include <linux/slab.h> 27 #include <linux/spinlock.h> 28 29 #include <asm/cacheflush.h> 30 #include <asm/smp_plat.h> 31 32 #define DRIVER_NAME "CCI-400" 33 #define DRIVER_NAME_PMU DRIVER_NAME " PMU" 34 35 #define CCI_PORT_CTRL 0x0 36 #define CCI_CTRL_STATUS 0xc 37 38 #define CCI_ENABLE_SNOOP_REQ 0x1 39 #define CCI_ENABLE_DVM_REQ 0x2 40 #define CCI_ENABLE_REQ (CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ) 41 42 struct cci_nb_ports { 43 unsigned int nb_ace; 44 unsigned int nb_ace_lite; 45 }; 46 47 enum cci_ace_port_type { 48 ACE_INVALID_PORT = 0x0, 49 ACE_PORT, 50 ACE_LITE_PORT, 51 }; 52 53 struct cci_ace_port { 54 void __iomem *base; 55 unsigned long phys; 56 enum cci_ace_port_type type; 57 struct device_node *dn; 58 }; 59 60 static struct cci_ace_port *ports; 61 static unsigned int nb_cci_ports; 62 63 static void __iomem *cci_ctrl_base; 64 static unsigned long cci_ctrl_phys; 65 66 #ifdef CONFIG_HW_PERF_EVENTS 67 68 #define CCI_PMCR 0x0100 69 #define CCI_PID2 0x0fe8 70 71 #define CCI_PMCR_CEN 0x00000001 72 #define CCI_PMCR_NCNT_MASK 0x0000f800 73 #define CCI_PMCR_NCNT_SHIFT 11 74 75 #define CCI_PID2_REV_MASK 0xf0 76 #define CCI_PID2_REV_SHIFT 4 77 78 /* Port ids */ 79 #define CCI_PORT_S0 0 80 #define CCI_PORT_S1 1 81 #define CCI_PORT_S2 2 82 #define CCI_PORT_S3 3 83 #define CCI_PORT_S4 4 84 #define CCI_PORT_M0 5 85 #define CCI_PORT_M1 6 86 #define CCI_PORT_M2 7 87 88 #define CCI_REV_R0 0 89 #define CCI_REV_R1 1 90 #define CCI_REV_R1_PX 5 91 92 #define CCI_PMU_EVT_SEL 0x000 93 #define CCI_PMU_CNTR 0x004 94 #define CCI_PMU_CNTR_CTRL 0x008 95 #define CCI_PMU_OVRFLW 0x00c 96 97 #define CCI_PMU_OVRFLW_FLAG 1 98 99 #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) 100 101 #define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) 102 103 /* 104 * Instead of an event id to monitor CCI cycles, a dedicated counter is 105 * provided. Use 0xff to represent CCI cycles and hope that no future revisions 106 * make use of this event in hardware. 107 */ 108 enum cci400_perf_events { 109 CCI_PMU_CYCLES = 0xff 110 }; 111 112 #define CCI_PMU_EVENT_MASK 0xff 113 #define CCI_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7) 114 #define CCI_PMU_EVENT_CODE(event) (event & 0x1f) 115 116 #define CCI_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */ 117 118 #define CCI_PMU_CYCLE_CNTR_IDX 0 119 #define CCI_PMU_CNTR0_IDX 1 120 #define CCI_PMU_CNTR_LAST(cci_pmu) (CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1) 121 122 /* 123 * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8 124 * ports and bits 4:0 are event codes. There are different event codes 125 * associated with each port type. 126 * 127 * Additionally, the range of events associated with the port types changed 128 * between Rev0 and Rev1. 129 * 130 * The constants below define the range of valid codes for each port type for 131 * the different revisions and are used to validate the event to be monitored. 132 */ 133 134 #define CCI_REV_R0_SLAVE_PORT_MIN_EV 0x00 135 #define CCI_REV_R0_SLAVE_PORT_MAX_EV 0x13 136 #define CCI_REV_R0_MASTER_PORT_MIN_EV 0x14 137 #define CCI_REV_R0_MASTER_PORT_MAX_EV 0x1a 138 139 #define CCI_REV_R1_SLAVE_PORT_MIN_EV 0x00 140 #define CCI_REV_R1_SLAVE_PORT_MAX_EV 0x14 141 #define CCI_REV_R1_MASTER_PORT_MIN_EV 0x00 142 #define CCI_REV_R1_MASTER_PORT_MAX_EV 0x11 143 144 struct pmu_port_event_ranges { 145 u8 slave_min; 146 u8 slave_max; 147 u8 master_min; 148 u8 master_max; 149 }; 150 151 static struct pmu_port_event_ranges port_event_range[] = { 152 [CCI_REV_R0] = { 153 .slave_min = CCI_REV_R0_SLAVE_PORT_MIN_EV, 154 .slave_max = CCI_REV_R0_SLAVE_PORT_MAX_EV, 155 .master_min = CCI_REV_R0_MASTER_PORT_MIN_EV, 156 .master_max = CCI_REV_R0_MASTER_PORT_MAX_EV, 157 }, 158 [CCI_REV_R1] = { 159 .slave_min = CCI_REV_R1_SLAVE_PORT_MIN_EV, 160 .slave_max = CCI_REV_R1_SLAVE_PORT_MAX_EV, 161 .master_min = CCI_REV_R1_MASTER_PORT_MIN_EV, 162 .master_max = CCI_REV_R1_MASTER_PORT_MAX_EV, 163 }, 164 }; 165 166 /* 167 * Export different PMU names for the different revisions so userspace knows 168 * because the event ids are different 169 */ 170 static char *const pmu_names[] = { 171 [CCI_REV_R0] = "CCI_400", 172 [CCI_REV_R1] = "CCI_400_r1", 173 }; 174 175 struct cci_pmu_hw_events { 176 struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; 177 unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; 178 raw_spinlock_t pmu_lock; 179 }; 180 181 struct cci_pmu { 182 void __iomem *base; 183 struct pmu pmu; 184 int nr_irqs; 185 int irqs[CCI_PMU_MAX_HW_EVENTS]; 186 unsigned long active_irqs; 187 struct pmu_port_event_ranges *port_ranges; 188 struct cci_pmu_hw_events hw_events; 189 struct platform_device *plat_device; 190 int num_events; 191 atomic_t active_events; 192 struct mutex reserve_mutex; 193 cpumask_t cpus; 194 }; 195 static struct cci_pmu *pmu; 196 197 #define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) 198 199 static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) 200 { 201 int i; 202 203 for (i = 0; i < nr_irqs; i++) 204 if (irq == irqs[i]) 205 return true; 206 207 return false; 208 } 209 210 static int probe_cci_revision(void) 211 { 212 int rev; 213 rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; 214 rev >>= CCI_PID2_REV_SHIFT; 215 216 if (rev < CCI_REV_R1_PX) 217 return CCI_REV_R0; 218 else 219 return CCI_REV_R1; 220 } 221 222 static struct pmu_port_event_ranges *port_range_by_rev(void) 223 { 224 int rev = probe_cci_revision(); 225 226 return &port_event_range[rev]; 227 } 228 229 static int pmu_is_valid_slave_event(u8 ev_code) 230 { 231 return pmu->port_ranges->slave_min <= ev_code && 232 ev_code <= pmu->port_ranges->slave_max; 233 } 234 235 static int pmu_is_valid_master_event(u8 ev_code) 236 { 237 return pmu->port_ranges->master_min <= ev_code && 238 ev_code <= pmu->port_ranges->master_max; 239 } 240 241 static int pmu_validate_hw_event(u8 hw_event) 242 { 243 u8 ev_source = CCI_PMU_EVENT_SOURCE(hw_event); 244 u8 ev_code = CCI_PMU_EVENT_CODE(hw_event); 245 246 switch (ev_source) { 247 case CCI_PORT_S0: 248 case CCI_PORT_S1: 249 case CCI_PORT_S2: 250 case CCI_PORT_S3: 251 case CCI_PORT_S4: 252 /* Slave Interface */ 253 if (pmu_is_valid_slave_event(ev_code)) 254 return hw_event; 255 break; 256 case CCI_PORT_M0: 257 case CCI_PORT_M1: 258 case CCI_PORT_M2: 259 /* Master Interface */ 260 if (pmu_is_valid_master_event(ev_code)) 261 return hw_event; 262 break; 263 } 264 265 return -ENOENT; 266 } 267 268 static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) 269 { 270 return CCI_PMU_CYCLE_CNTR_IDX <= idx && 271 idx <= CCI_PMU_CNTR_LAST(cci_pmu); 272 } 273 274 static u32 pmu_read_register(int idx, unsigned int offset) 275 { 276 return readl_relaxed(pmu->base + CCI_PMU_CNTR_BASE(idx) + offset); 277 } 278 279 static void pmu_write_register(u32 value, int idx, unsigned int offset) 280 { 281 return writel_relaxed(value, pmu->base + CCI_PMU_CNTR_BASE(idx) + offset); 282 } 283 284 static void pmu_disable_counter(int idx) 285 { 286 pmu_write_register(0, idx, CCI_PMU_CNTR_CTRL); 287 } 288 289 static void pmu_enable_counter(int idx) 290 { 291 pmu_write_register(1, idx, CCI_PMU_CNTR_CTRL); 292 } 293 294 static void pmu_set_event(int idx, unsigned long event) 295 { 296 event &= CCI_PMU_EVENT_MASK; 297 pmu_write_register(event, idx, CCI_PMU_EVT_SEL); 298 } 299 300 static u32 pmu_get_max_counters(void) 301 { 302 u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) & 303 CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT; 304 305 /* add 1 for cycle counter */ 306 return n_cnts + 1; 307 } 308 309 static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event) 310 { 311 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 312 struct hw_perf_event *hw_event = &event->hw; 313 unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; 314 int idx; 315 316 if (cci_event == CCI_PMU_CYCLES) { 317 if (test_and_set_bit(CCI_PMU_CYCLE_CNTR_IDX, hw->used_mask)) 318 return -EAGAIN; 319 320 return CCI_PMU_CYCLE_CNTR_IDX; 321 } 322 323 for (idx = CCI_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx) 324 if (!test_and_set_bit(idx, hw->used_mask)) 325 return idx; 326 327 /* No counters available */ 328 return -EAGAIN; 329 } 330 331 static int pmu_map_event(struct perf_event *event) 332 { 333 int mapping; 334 u8 config = event->attr.config & CCI_PMU_EVENT_MASK; 335 336 if (event->attr.type < PERF_TYPE_MAX) 337 return -ENOENT; 338 339 if (config == CCI_PMU_CYCLES) 340 mapping = config; 341 else 342 mapping = pmu_validate_hw_event(config); 343 344 return mapping; 345 } 346 347 static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler) 348 { 349 int i; 350 struct platform_device *pmu_device = cci_pmu->plat_device; 351 352 if (unlikely(!pmu_device)) 353 return -ENODEV; 354 355 if (pmu->nr_irqs < 1) { 356 dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n"); 357 return -ENODEV; 358 } 359 360 /* 361 * Register all available CCI PMU interrupts. In the interrupt handler 362 * we iterate over the counters checking for interrupt source (the 363 * overflowing counter) and clear it. 364 * 365 * This should allow handling of non-unique interrupt for the counters. 366 */ 367 for (i = 0; i < pmu->nr_irqs; i++) { 368 int err = request_irq(pmu->irqs[i], handler, IRQF_SHARED, 369 "arm-cci-pmu", cci_pmu); 370 if (err) { 371 dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", 372 pmu->irqs[i]); 373 return err; 374 } 375 376 set_bit(i, &pmu->active_irqs); 377 } 378 379 return 0; 380 } 381 382 static void pmu_free_irq(struct cci_pmu *cci_pmu) 383 { 384 int i; 385 386 for (i = 0; i < pmu->nr_irqs; i++) { 387 if (!test_and_clear_bit(i, &pmu->active_irqs)) 388 continue; 389 390 free_irq(pmu->irqs[i], cci_pmu); 391 } 392 } 393 394 static u32 pmu_read_counter(struct perf_event *event) 395 { 396 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 397 struct hw_perf_event *hw_counter = &event->hw; 398 int idx = hw_counter->idx; 399 u32 value; 400 401 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 402 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 403 return 0; 404 } 405 value = pmu_read_register(idx, CCI_PMU_CNTR); 406 407 return value; 408 } 409 410 static void pmu_write_counter(struct perf_event *event, u32 value) 411 { 412 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 413 struct hw_perf_event *hw_counter = &event->hw; 414 int idx = hw_counter->idx; 415 416 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) 417 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 418 else 419 pmu_write_register(value, idx, CCI_PMU_CNTR); 420 } 421 422 static u64 pmu_event_update(struct perf_event *event) 423 { 424 struct hw_perf_event *hwc = &event->hw; 425 u64 delta, prev_raw_count, new_raw_count; 426 427 do { 428 prev_raw_count = local64_read(&hwc->prev_count); 429 new_raw_count = pmu_read_counter(event); 430 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 431 new_raw_count) != prev_raw_count); 432 433 delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK; 434 435 local64_add(delta, &event->count); 436 437 return new_raw_count; 438 } 439 440 static void pmu_read(struct perf_event *event) 441 { 442 pmu_event_update(event); 443 } 444 445 void pmu_event_set_period(struct perf_event *event) 446 { 447 struct hw_perf_event *hwc = &event->hw; 448 /* 449 * The CCI PMU counters have a period of 2^32. To account for the 450 * possiblity of extreme interrupt latency we program for a period of 451 * half that. Hopefully we can handle the interrupt before another 2^31 452 * events occur and the counter overtakes its previous value. 453 */ 454 u64 val = 1ULL << 31; 455 local64_set(&hwc->prev_count, val); 456 pmu_write_counter(event, val); 457 } 458 459 static irqreturn_t pmu_handle_irq(int irq_num, void *dev) 460 { 461 unsigned long flags; 462 struct cci_pmu *cci_pmu = dev; 463 struct cci_pmu_hw_events *events = &pmu->hw_events; 464 int idx, handled = IRQ_NONE; 465 466 raw_spin_lock_irqsave(&events->pmu_lock, flags); 467 /* 468 * Iterate over counters and update the corresponding perf events. 469 * This should work regardless of whether we have per-counter overflow 470 * interrupt or a combined overflow interrupt. 471 */ 472 for (idx = CCI_PMU_CYCLE_CNTR_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { 473 struct perf_event *event = events->events[idx]; 474 struct hw_perf_event *hw_counter; 475 476 if (!event) 477 continue; 478 479 hw_counter = &event->hw; 480 481 /* Did this counter overflow? */ 482 if (!(pmu_read_register(idx, CCI_PMU_OVRFLW) & 483 CCI_PMU_OVRFLW_FLAG)) 484 continue; 485 486 pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); 487 488 pmu_event_update(event); 489 pmu_event_set_period(event); 490 handled = IRQ_HANDLED; 491 } 492 raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 493 494 return IRQ_RETVAL(handled); 495 } 496 497 static int cci_pmu_get_hw(struct cci_pmu *cci_pmu) 498 { 499 int ret = pmu_request_irq(cci_pmu, pmu_handle_irq); 500 if (ret) { 501 pmu_free_irq(cci_pmu); 502 return ret; 503 } 504 return 0; 505 } 506 507 static void cci_pmu_put_hw(struct cci_pmu *cci_pmu) 508 { 509 pmu_free_irq(cci_pmu); 510 } 511 512 static void hw_perf_event_destroy(struct perf_event *event) 513 { 514 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 515 atomic_t *active_events = &cci_pmu->active_events; 516 struct mutex *reserve_mutex = &cci_pmu->reserve_mutex; 517 518 if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) { 519 cci_pmu_put_hw(cci_pmu); 520 mutex_unlock(reserve_mutex); 521 } 522 } 523 524 static void cci_pmu_enable(struct pmu *pmu) 525 { 526 struct cci_pmu *cci_pmu = to_cci_pmu(pmu); 527 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; 528 int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events); 529 unsigned long flags; 530 u32 val; 531 532 if (!enabled) 533 return; 534 535 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); 536 537 /* Enable all the PMU counters. */ 538 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; 539 writel(val, cci_ctrl_base + CCI_PMCR); 540 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); 541 542 } 543 544 static void cci_pmu_disable(struct pmu *pmu) 545 { 546 struct cci_pmu *cci_pmu = to_cci_pmu(pmu); 547 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; 548 unsigned long flags; 549 u32 val; 550 551 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); 552 553 /* Disable all the PMU counters. */ 554 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; 555 writel(val, cci_ctrl_base + CCI_PMCR); 556 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); 557 } 558 559 static void cci_pmu_start(struct perf_event *event, int pmu_flags) 560 { 561 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 562 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; 563 struct hw_perf_event *hwc = &event->hw; 564 int idx = hwc->idx; 565 unsigned long flags; 566 567 /* 568 * To handle interrupt latency, we always reprogram the period 569 * regardlesss of PERF_EF_RELOAD. 570 */ 571 if (pmu_flags & PERF_EF_RELOAD) 572 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 573 574 hwc->state = 0; 575 576 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 577 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 578 return; 579 } 580 581 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); 582 583 /* Configure the event to count, unless you are counting cycles */ 584 if (idx != CCI_PMU_CYCLE_CNTR_IDX) 585 pmu_set_event(idx, hwc->config_base); 586 587 pmu_event_set_period(event); 588 pmu_enable_counter(idx); 589 590 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); 591 } 592 593 static void cci_pmu_stop(struct perf_event *event, int pmu_flags) 594 { 595 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 596 struct hw_perf_event *hwc = &event->hw; 597 int idx = hwc->idx; 598 599 if (hwc->state & PERF_HES_STOPPED) 600 return; 601 602 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 603 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 604 return; 605 } 606 607 /* 608 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See 609 * cci_pmu_start() 610 */ 611 pmu_disable_counter(idx); 612 pmu_event_update(event); 613 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 614 } 615 616 static int cci_pmu_add(struct perf_event *event, int flags) 617 { 618 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 619 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; 620 struct hw_perf_event *hwc = &event->hw; 621 int idx; 622 int err = 0; 623 624 perf_pmu_disable(event->pmu); 625 626 /* If we don't have a space for the counter then finish early. */ 627 idx = pmu_get_event_idx(hw_events, event); 628 if (idx < 0) { 629 err = idx; 630 goto out; 631 } 632 633 event->hw.idx = idx; 634 hw_events->events[idx] = event; 635 636 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 637 if (flags & PERF_EF_START) 638 cci_pmu_start(event, PERF_EF_RELOAD); 639 640 /* Propagate our changes to the userspace mapping. */ 641 perf_event_update_userpage(event); 642 643 out: 644 perf_pmu_enable(event->pmu); 645 return err; 646 } 647 648 static void cci_pmu_del(struct perf_event *event, int flags) 649 { 650 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 651 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; 652 struct hw_perf_event *hwc = &event->hw; 653 int idx = hwc->idx; 654 655 cci_pmu_stop(event, PERF_EF_UPDATE); 656 hw_events->events[idx] = NULL; 657 clear_bit(idx, hw_events->used_mask); 658 659 perf_event_update_userpage(event); 660 } 661 662 static int 663 validate_event(struct cci_pmu_hw_events *hw_events, 664 struct perf_event *event) 665 { 666 if (is_software_event(event)) 667 return 1; 668 669 if (event->state < PERF_EVENT_STATE_OFF) 670 return 1; 671 672 if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) 673 return 1; 674 675 return pmu_get_event_idx(hw_events, event) >= 0; 676 } 677 678 static int 679 validate_group(struct perf_event *event) 680 { 681 struct perf_event *sibling, *leader = event->group_leader; 682 struct cci_pmu_hw_events fake_pmu = { 683 /* 684 * Initialise the fake PMU. We only need to populate the 685 * used_mask for the purposes of validation. 686 */ 687 .used_mask = CPU_BITS_NONE, 688 }; 689 690 if (!validate_event(&fake_pmu, leader)) 691 return -EINVAL; 692 693 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 694 if (!validate_event(&fake_pmu, sibling)) 695 return -EINVAL; 696 } 697 698 if (!validate_event(&fake_pmu, event)) 699 return -EINVAL; 700 701 return 0; 702 } 703 704 static int 705 __hw_perf_event_init(struct perf_event *event) 706 { 707 struct hw_perf_event *hwc = &event->hw; 708 int mapping; 709 710 mapping = pmu_map_event(event); 711 712 if (mapping < 0) { 713 pr_debug("event %x:%llx not supported\n", event->attr.type, 714 event->attr.config); 715 return mapping; 716 } 717 718 /* 719 * We don't assign an index until we actually place the event onto 720 * hardware. Use -1 to signify that we haven't decided where to put it 721 * yet. 722 */ 723 hwc->idx = -1; 724 hwc->config_base = 0; 725 hwc->config = 0; 726 hwc->event_base = 0; 727 728 /* 729 * Store the event encoding into the config_base field. 730 */ 731 hwc->config_base |= (unsigned long)mapping; 732 733 /* 734 * Limit the sample_period to half of the counter width. That way, the 735 * new counter value is far less likely to overtake the previous one 736 * unless you have some serious IRQ latency issues. 737 */ 738 hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; 739 hwc->last_period = hwc->sample_period; 740 local64_set(&hwc->period_left, hwc->sample_period); 741 742 if (event->group_leader != event) { 743 if (validate_group(event) != 0) 744 return -EINVAL; 745 } 746 747 return 0; 748 } 749 750 static int cci_pmu_event_init(struct perf_event *event) 751 { 752 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 753 atomic_t *active_events = &cci_pmu->active_events; 754 int err = 0; 755 int cpu; 756 757 if (event->attr.type != event->pmu->type) 758 return -ENOENT; 759 760 /* Shared by all CPUs, no meaningful state to sample */ 761 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) 762 return -EOPNOTSUPP; 763 764 /* We have no filtering of any kind */ 765 if (event->attr.exclude_user || 766 event->attr.exclude_kernel || 767 event->attr.exclude_hv || 768 event->attr.exclude_idle || 769 event->attr.exclude_host || 770 event->attr.exclude_guest) 771 return -EINVAL; 772 773 /* 774 * Following the example set by other "uncore" PMUs, we accept any CPU 775 * and rewrite its affinity dynamically rather than having perf core 776 * handle cpu == -1 and pid == -1 for this case. 777 * 778 * The perf core will pin online CPUs for the duration of this call and 779 * the event being installed into its context, so the PMU's CPU can't 780 * change under our feet. 781 */ 782 cpu = cpumask_first(&cci_pmu->cpus); 783 if (event->cpu < 0 || cpu < 0) 784 return -EINVAL; 785 event->cpu = cpu; 786 787 event->destroy = hw_perf_event_destroy; 788 if (!atomic_inc_not_zero(active_events)) { 789 mutex_lock(&cci_pmu->reserve_mutex); 790 if (atomic_read(active_events) == 0) 791 err = cci_pmu_get_hw(cci_pmu); 792 if (!err) 793 atomic_inc(active_events); 794 mutex_unlock(&cci_pmu->reserve_mutex); 795 } 796 if (err) 797 return err; 798 799 err = __hw_perf_event_init(event); 800 if (err) 801 hw_perf_event_destroy(event); 802 803 return err; 804 } 805 806 static ssize_t pmu_attr_cpumask_show(struct device *dev, 807 struct device_attribute *attr, char *buf) 808 { 809 int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", 810 cpumask_pr_args(&pmu->cpus)); 811 buf[n++] = '\n'; 812 buf[n] = '\0'; 813 return n; 814 } 815 816 static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL); 817 818 static struct attribute *pmu_attrs[] = { 819 &dev_attr_cpumask.attr, 820 NULL, 821 }; 822 823 static struct attribute_group pmu_attr_group = { 824 .attrs = pmu_attrs, 825 }; 826 827 static const struct attribute_group *pmu_attr_groups[] = { 828 &pmu_attr_group, 829 NULL 830 }; 831 832 static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) 833 { 834 char *name = pmu_names[probe_cci_revision()]; 835 cci_pmu->pmu = (struct pmu) { 836 .name = pmu_names[probe_cci_revision()], 837 .task_ctx_nr = perf_invalid_context, 838 .pmu_enable = cci_pmu_enable, 839 .pmu_disable = cci_pmu_disable, 840 .event_init = cci_pmu_event_init, 841 .add = cci_pmu_add, 842 .del = cci_pmu_del, 843 .start = cci_pmu_start, 844 .stop = cci_pmu_stop, 845 .read = pmu_read, 846 .attr_groups = pmu_attr_groups, 847 }; 848 849 cci_pmu->plat_device = pdev; 850 cci_pmu->num_events = pmu_get_max_counters(); 851 852 return perf_pmu_register(&cci_pmu->pmu, name, -1); 853 } 854 855 static int cci_pmu_cpu_notifier(struct notifier_block *self, 856 unsigned long action, void *hcpu) 857 { 858 unsigned int cpu = (long)hcpu; 859 unsigned int target; 860 861 switch (action & ~CPU_TASKS_FROZEN) { 862 case CPU_DOWN_PREPARE: 863 if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus)) 864 break; 865 target = cpumask_any_but(cpu_online_mask, cpu); 866 if (target < 0) // UP, last CPU 867 break; 868 /* 869 * TODO: migrate context once core races on event->ctx have 870 * been fixed. 871 */ 872 cpumask_set_cpu(target, &pmu->cpus); 873 default: 874 break; 875 } 876 877 return NOTIFY_OK; 878 } 879 880 static struct notifier_block cci_pmu_cpu_nb = { 881 .notifier_call = cci_pmu_cpu_notifier, 882 /* 883 * to migrate uncore events, our notifier should be executed 884 * before perf core's notifier. 885 */ 886 .priority = CPU_PRI_PERF + 1, 887 }; 888 889 static const struct of_device_id arm_cci_pmu_matches[] = { 890 { 891 .compatible = "arm,cci-400-pmu", 892 }, 893 {}, 894 }; 895 896 static int cci_pmu_probe(struct platform_device *pdev) 897 { 898 struct resource *res; 899 int i, ret, irq; 900 901 pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL); 902 if (!pmu) 903 return -ENOMEM; 904 905 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 906 pmu->base = devm_ioremap_resource(&pdev->dev, res); 907 if (IS_ERR(pmu->base)) 908 return -ENOMEM; 909 910 /* 911 * CCI PMU has 5 overflow signals - one per counter; but some may be tied 912 * together to a common interrupt. 913 */ 914 pmu->nr_irqs = 0; 915 for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) { 916 irq = platform_get_irq(pdev, i); 917 if (irq < 0) 918 break; 919 920 if (is_duplicate_irq(irq, pmu->irqs, pmu->nr_irqs)) 921 continue; 922 923 pmu->irqs[pmu->nr_irqs++] = irq; 924 } 925 926 /* 927 * Ensure that the device tree has as many interrupts as the number 928 * of counters. 929 */ 930 if (i < CCI_PMU_MAX_HW_EVENTS) { 931 dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n", 932 i, CCI_PMU_MAX_HW_EVENTS); 933 return -EINVAL; 934 } 935 936 pmu->port_ranges = port_range_by_rev(); 937 if (!pmu->port_ranges) { 938 dev_warn(&pdev->dev, "CCI PMU version not supported\n"); 939 return -EINVAL; 940 } 941 942 raw_spin_lock_init(&pmu->hw_events.pmu_lock); 943 mutex_init(&pmu->reserve_mutex); 944 atomic_set(&pmu->active_events, 0); 945 cpumask_set_cpu(smp_processor_id(), &pmu->cpus); 946 947 ret = register_cpu_notifier(&cci_pmu_cpu_nb); 948 if (ret) 949 return ret; 950 951 ret = cci_pmu_init(pmu, pdev); 952 if (ret) 953 return ret; 954 955 return 0; 956 } 957 958 static int cci_platform_probe(struct platform_device *pdev) 959 { 960 if (!cci_probed()) 961 return -ENODEV; 962 963 return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); 964 } 965 966 #endif /* CONFIG_HW_PERF_EVENTS */ 967 968 struct cpu_port { 969 u64 mpidr; 970 u32 port; 971 }; 972 973 /* 974 * Use the port MSB as valid flag, shift can be made dynamic 975 * by computing number of bits required for port indexes. 976 * Code disabling CCI cpu ports runs with D-cache invalidated 977 * and SCTLR bit clear so data accesses must be kept to a minimum 978 * to improve performance; for now shift is left static to 979 * avoid one more data access while disabling the CCI port. 980 */ 981 #define PORT_VALID_SHIFT 31 982 #define PORT_VALID (0x1 << PORT_VALID_SHIFT) 983 984 static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr) 985 { 986 port->port = PORT_VALID | index; 987 port->mpidr = mpidr; 988 } 989 990 static inline bool cpu_port_is_valid(struct cpu_port *port) 991 { 992 return !!(port->port & PORT_VALID); 993 } 994 995 static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr) 996 { 997 return port->mpidr == (mpidr & MPIDR_HWID_BITMASK); 998 } 999 1000 static struct cpu_port cpu_port[NR_CPUS]; 1001 1002 /** 1003 * __cci_ace_get_port - Function to retrieve the port index connected to 1004 * a cpu or device. 1005 * 1006 * @dn: device node of the device to look-up 1007 * @type: port type 1008 * 1009 * Return value: 1010 * - CCI port index if success 1011 * - -ENODEV if failure 1012 */ 1013 static int __cci_ace_get_port(struct device_node *dn, int type) 1014 { 1015 int i; 1016 bool ace_match; 1017 struct device_node *cci_portn; 1018 1019 cci_portn = of_parse_phandle(dn, "cci-control-port", 0); 1020 for (i = 0; i < nb_cci_ports; i++) { 1021 ace_match = ports[i].type == type; 1022 if (ace_match && cci_portn == ports[i].dn) 1023 return i; 1024 } 1025 return -ENODEV; 1026 } 1027 1028 int cci_ace_get_port(struct device_node *dn) 1029 { 1030 return __cci_ace_get_port(dn, ACE_LITE_PORT); 1031 } 1032 EXPORT_SYMBOL_GPL(cci_ace_get_port); 1033 1034 static void cci_ace_init_ports(void) 1035 { 1036 int port, cpu; 1037 struct device_node *cpun; 1038 1039 /* 1040 * Port index look-up speeds up the function disabling ports by CPU, 1041 * since the logical to port index mapping is done once and does 1042 * not change after system boot. 1043 * The stashed index array is initialized for all possible CPUs 1044 * at probe time. 1045 */ 1046 for_each_possible_cpu(cpu) { 1047 /* too early to use cpu->of_node */ 1048 cpun = of_get_cpu_node(cpu, NULL); 1049 1050 if (WARN(!cpun, "Missing cpu device node\n")) 1051 continue; 1052 1053 port = __cci_ace_get_port(cpun, ACE_PORT); 1054 if (port < 0) 1055 continue; 1056 1057 init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu)); 1058 } 1059 1060 for_each_possible_cpu(cpu) { 1061 WARN(!cpu_port_is_valid(&cpu_port[cpu]), 1062 "CPU %u does not have an associated CCI port\n", 1063 cpu); 1064 } 1065 } 1066 /* 1067 * Functions to enable/disable a CCI interconnect slave port 1068 * 1069 * They are called by low-level power management code to disable slave 1070 * interfaces snoops and DVM broadcast. 1071 * Since they may execute with cache data allocation disabled and 1072 * after the caches have been cleaned and invalidated the functions provide 1073 * no explicit locking since they may run with D-cache disabled, so normal 1074 * cacheable kernel locks based on ldrex/strex may not work. 1075 * Locking has to be provided by BSP implementations to ensure proper 1076 * operations. 1077 */ 1078 1079 /** 1080 * cci_port_control() - function to control a CCI port 1081 * 1082 * @port: index of the port to setup 1083 * @enable: if true enables the port, if false disables it 1084 */ 1085 static void notrace cci_port_control(unsigned int port, bool enable) 1086 { 1087 void __iomem *base = ports[port].base; 1088 1089 writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL); 1090 /* 1091 * This function is called from power down procedures 1092 * and must not execute any instruction that might 1093 * cause the processor to be put in a quiescent state 1094 * (eg wfi). Hence, cpu_relax() can not be added to this 1095 * read loop to optimize power, since it might hide possibly 1096 * disruptive operations. 1097 */ 1098 while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1) 1099 ; 1100 } 1101 1102 /** 1103 * cci_disable_port_by_cpu() - function to disable a CCI port by CPU 1104 * reference 1105 * 1106 * @mpidr: mpidr of the CPU whose CCI port should be disabled 1107 * 1108 * Disabling a CCI port for a CPU implies disabling the CCI port 1109 * controlling that CPU cluster. Code disabling CPU CCI ports 1110 * must make sure that the CPU running the code is the last active CPU 1111 * in the cluster ie all other CPUs are quiescent in a low power state. 1112 * 1113 * Return: 1114 * 0 on success 1115 * -ENODEV on port look-up failure 1116 */ 1117 int notrace cci_disable_port_by_cpu(u64 mpidr) 1118 { 1119 int cpu; 1120 bool is_valid; 1121 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 1122 is_valid = cpu_port_is_valid(&cpu_port[cpu]); 1123 if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) { 1124 cci_port_control(cpu_port[cpu].port, false); 1125 return 0; 1126 } 1127 } 1128 return -ENODEV; 1129 } 1130 EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu); 1131 1132 /** 1133 * cci_enable_port_for_self() - enable a CCI port for calling CPU 1134 * 1135 * Enabling a CCI port for the calling CPU implies enabling the CCI 1136 * port controlling that CPU's cluster. Caller must make sure that the 1137 * CPU running the code is the first active CPU in the cluster and all 1138 * other CPUs are quiescent in a low power state or waiting for this CPU 1139 * to complete the CCI initialization. 1140 * 1141 * Because this is called when the MMU is still off and with no stack, 1142 * the code must be position independent and ideally rely on callee 1143 * clobbered registers only. To achieve this we must code this function 1144 * entirely in assembler. 1145 * 1146 * On success this returns with the proper CCI port enabled. In case of 1147 * any failure this never returns as the inability to enable the CCI is 1148 * fatal and there is no possible recovery at this stage. 1149 */ 1150 asmlinkage void __naked cci_enable_port_for_self(void) 1151 { 1152 asm volatile ("\n" 1153 " .arch armv7-a\n" 1154 " mrc p15, 0, r0, c0, c0, 5 @ get MPIDR value \n" 1155 " and r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n" 1156 " adr r1, 5f \n" 1157 " ldr r2, [r1] \n" 1158 " add r1, r1, r2 @ &cpu_port \n" 1159 " add ip, r1, %[sizeof_cpu_port] \n" 1160 1161 /* Loop over the cpu_port array looking for a matching MPIDR */ 1162 "1: ldr r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n" 1163 " cmp r2, r0 @ compare MPIDR \n" 1164 " bne 2f \n" 1165 1166 /* Found a match, now test port validity */ 1167 " ldr r3, [r1, %[offsetof_cpu_port_port]] \n" 1168 " tst r3, #"__stringify(PORT_VALID)" \n" 1169 " bne 3f \n" 1170 1171 /* no match, loop with the next cpu_port entry */ 1172 "2: add r1, r1, %[sizeof_struct_cpu_port] \n" 1173 " cmp r1, ip @ done? \n" 1174 " blo 1b \n" 1175 1176 /* CCI port not found -- cheaply try to stall this CPU */ 1177 "cci_port_not_found: \n" 1178 " wfi \n" 1179 " wfe \n" 1180 " b cci_port_not_found \n" 1181 1182 /* Use matched port index to look up the corresponding ports entry */ 1183 "3: bic r3, r3, #"__stringify(PORT_VALID)" \n" 1184 " adr r0, 6f \n" 1185 " ldmia r0, {r1, r2} \n" 1186 " sub r1, r1, r0 @ virt - phys \n" 1187 " ldr r0, [r0, r2] @ *(&ports) \n" 1188 " mov r2, %[sizeof_struct_ace_port] \n" 1189 " mla r0, r2, r3, r0 @ &ports[index] \n" 1190 " sub r0, r0, r1 @ virt_to_phys() \n" 1191 1192 /* Enable the CCI port */ 1193 " ldr r0, [r0, %[offsetof_port_phys]] \n" 1194 " mov r3, %[cci_enable_req]\n" 1195 " str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" 1196 1197 /* poll the status reg for completion */ 1198 " adr r1, 7f \n" 1199 " ldr r0, [r1] \n" 1200 " ldr r0, [r0, r1] @ cci_ctrl_base \n" 1201 "4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" 1202 " tst r1, %[cci_control_status_bits] \n" 1203 " bne 4b \n" 1204 1205 " mov r0, #0 \n" 1206 " bx lr \n" 1207 1208 " .align 2 \n" 1209 "5: .word cpu_port - . \n" 1210 "6: .word . \n" 1211 " .word ports - 6b \n" 1212 "7: .word cci_ctrl_phys - . \n" 1213 : : 1214 [sizeof_cpu_port] "i" (sizeof(cpu_port)), 1215 [cci_enable_req] "i" cpu_to_le32(CCI_ENABLE_REQ), 1216 [cci_control_status_bits] "i" cpu_to_le32(1), 1217 #ifndef __ARMEB__ 1218 [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), 1219 #else 1220 [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4), 1221 #endif 1222 [offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)), 1223 [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), 1224 [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), 1225 [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); 1226 1227 unreachable(); 1228 } 1229 1230 /** 1231 * __cci_control_port_by_device() - function to control a CCI port by device 1232 * reference 1233 * 1234 * @dn: device node pointer of the device whose CCI port should be 1235 * controlled 1236 * @enable: if true enables the port, if false disables it 1237 * 1238 * Return: 1239 * 0 on success 1240 * -ENODEV on port look-up failure 1241 */ 1242 int notrace __cci_control_port_by_device(struct device_node *dn, bool enable) 1243 { 1244 int port; 1245 1246 if (!dn) 1247 return -ENODEV; 1248 1249 port = __cci_ace_get_port(dn, ACE_LITE_PORT); 1250 if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n", 1251 dn->full_name)) 1252 return -ENODEV; 1253 cci_port_control(port, enable); 1254 return 0; 1255 } 1256 EXPORT_SYMBOL_GPL(__cci_control_port_by_device); 1257 1258 /** 1259 * __cci_control_port_by_index() - function to control a CCI port by port index 1260 * 1261 * @port: port index previously retrieved with cci_ace_get_port() 1262 * @enable: if true enables the port, if false disables it 1263 * 1264 * Return: 1265 * 0 on success 1266 * -ENODEV on port index out of range 1267 * -EPERM if operation carried out on an ACE PORT 1268 */ 1269 int notrace __cci_control_port_by_index(u32 port, bool enable) 1270 { 1271 if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT) 1272 return -ENODEV; 1273 /* 1274 * CCI control for ports connected to CPUS is extremely fragile 1275 * and must be made to go through a specific and controlled 1276 * interface (ie cci_disable_port_by_cpu(); control by general purpose 1277 * indexing is therefore disabled for ACE ports. 1278 */ 1279 if (ports[port].type == ACE_PORT) 1280 return -EPERM; 1281 1282 cci_port_control(port, enable); 1283 return 0; 1284 } 1285 EXPORT_SYMBOL_GPL(__cci_control_port_by_index); 1286 1287 static const struct cci_nb_ports cci400_ports = { 1288 .nb_ace = 2, 1289 .nb_ace_lite = 3 1290 }; 1291 1292 static const struct of_device_id arm_cci_matches[] = { 1293 {.compatible = "arm,cci-400", .data = &cci400_ports }, 1294 {}, 1295 }; 1296 1297 static const struct of_device_id arm_cci_ctrl_if_matches[] = { 1298 {.compatible = "arm,cci-400-ctrl-if", }, 1299 {}, 1300 }; 1301 1302 static int cci_probe(void) 1303 { 1304 struct cci_nb_ports const *cci_config; 1305 int ret, i, nb_ace = 0, nb_ace_lite = 0; 1306 struct device_node *np, *cp; 1307 struct resource res; 1308 const char *match_str; 1309 bool is_ace; 1310 1311 np = of_find_matching_node(NULL, arm_cci_matches); 1312 if (!np) 1313 return -ENODEV; 1314 1315 if (!of_device_is_available(np)) 1316 return -ENODEV; 1317 1318 cci_config = of_match_node(arm_cci_matches, np)->data; 1319 if (!cci_config) 1320 return -ENODEV; 1321 1322 nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite; 1323 1324 ports = kcalloc(nb_cci_ports, sizeof(*ports), GFP_KERNEL); 1325 if (!ports) 1326 return -ENOMEM; 1327 1328 ret = of_address_to_resource(np, 0, &res); 1329 if (!ret) { 1330 cci_ctrl_base = ioremap(res.start, resource_size(&res)); 1331 cci_ctrl_phys = res.start; 1332 } 1333 if (ret || !cci_ctrl_base) { 1334 WARN(1, "unable to ioremap CCI ctrl\n"); 1335 ret = -ENXIO; 1336 goto memalloc_err; 1337 } 1338 1339 for_each_child_of_node(np, cp) { 1340 if (!of_match_node(arm_cci_ctrl_if_matches, cp)) 1341 continue; 1342 1343 i = nb_ace + nb_ace_lite; 1344 1345 if (i >= nb_cci_ports) 1346 break; 1347 1348 if (of_property_read_string(cp, "interface-type", 1349 &match_str)) { 1350 WARN(1, "node %s missing interface-type property\n", 1351 cp->full_name); 1352 continue; 1353 } 1354 is_ace = strcmp(match_str, "ace") == 0; 1355 if (!is_ace && strcmp(match_str, "ace-lite")) { 1356 WARN(1, "node %s containing invalid interface-type property, skipping it\n", 1357 cp->full_name); 1358 continue; 1359 } 1360 1361 ret = of_address_to_resource(cp, 0, &res); 1362 if (!ret) { 1363 ports[i].base = ioremap(res.start, resource_size(&res)); 1364 ports[i].phys = res.start; 1365 } 1366 if (ret || !ports[i].base) { 1367 WARN(1, "unable to ioremap CCI port %d\n", i); 1368 continue; 1369 } 1370 1371 if (is_ace) { 1372 if (WARN_ON(nb_ace >= cci_config->nb_ace)) 1373 continue; 1374 ports[i].type = ACE_PORT; 1375 ++nb_ace; 1376 } else { 1377 if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite)) 1378 continue; 1379 ports[i].type = ACE_LITE_PORT; 1380 ++nb_ace_lite; 1381 } 1382 ports[i].dn = cp; 1383 } 1384 1385 /* initialize a stashed array of ACE ports to speed-up look-up */ 1386 cci_ace_init_ports(); 1387 1388 /* 1389 * Multi-cluster systems may need this data when non-coherent, during 1390 * cluster power-up/power-down. Make sure it reaches main memory. 1391 */ 1392 sync_cache_w(&cci_ctrl_base); 1393 sync_cache_w(&cci_ctrl_phys); 1394 sync_cache_w(&ports); 1395 sync_cache_w(&cpu_port); 1396 __sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports); 1397 pr_info("ARM CCI driver probed\n"); 1398 return 0; 1399 1400 memalloc_err: 1401 1402 kfree(ports); 1403 return ret; 1404 } 1405 1406 static int cci_init_status = -EAGAIN; 1407 static DEFINE_MUTEX(cci_probing); 1408 1409 static int cci_init(void) 1410 { 1411 if (cci_init_status != -EAGAIN) 1412 return cci_init_status; 1413 1414 mutex_lock(&cci_probing); 1415 if (cci_init_status == -EAGAIN) 1416 cci_init_status = cci_probe(); 1417 mutex_unlock(&cci_probing); 1418 return cci_init_status; 1419 } 1420 1421 #ifdef CONFIG_HW_PERF_EVENTS 1422 static struct platform_driver cci_pmu_driver = { 1423 .driver = { 1424 .name = DRIVER_NAME_PMU, 1425 .of_match_table = arm_cci_pmu_matches, 1426 }, 1427 .probe = cci_pmu_probe, 1428 }; 1429 1430 static struct platform_driver cci_platform_driver = { 1431 .driver = { 1432 .name = DRIVER_NAME, 1433 .of_match_table = arm_cci_matches, 1434 }, 1435 .probe = cci_platform_probe, 1436 }; 1437 1438 static int __init cci_platform_init(void) 1439 { 1440 int ret; 1441 1442 ret = platform_driver_register(&cci_pmu_driver); 1443 if (ret) 1444 return ret; 1445 1446 return platform_driver_register(&cci_platform_driver); 1447 } 1448 1449 #else 1450 1451 static int __init cci_platform_init(void) 1452 { 1453 return 0; 1454 } 1455 1456 #endif 1457 /* 1458 * To sort out early init calls ordering a helper function is provided to 1459 * check if the CCI driver has beed initialized. Function check if the driver 1460 * has been initialized, if not it calls the init function that probes 1461 * the driver and updates the return value. 1462 */ 1463 bool cci_probed(void) 1464 { 1465 return cci_init() == 0; 1466 } 1467 EXPORT_SYMBOL_GPL(cci_probed); 1468 1469 early_initcall(cci_init); 1470 core_initcall(cci_platform_init); 1471 MODULE_LICENSE("GPL"); 1472 MODULE_DESCRIPTION("ARM CCI support"); 1473