1 /* 2 * CCI cache coherent interconnect driver 3 * 4 * Copyright (C) 2013 ARM Ltd. 5 * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any 12 * kind, whether express or implied; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 */ 16 17 #include <linux/arm-cci.h> 18 #include <linux/io.h> 19 #include <linux/module.h> 20 #include <linux/of_address.h> 21 #include <linux/of_irq.h> 22 #include <linux/of_platform.h> 23 #include <linux/platform_device.h> 24 #include <linux/slab.h> 25 #include <linux/spinlock.h> 26 27 #include <asm/cacheflush.h> 28 #include <asm/irq_regs.h> 29 #include <asm/pmu.h> 30 #include <asm/smp_plat.h> 31 32 #define DRIVER_NAME "CCI-400" 33 #define DRIVER_NAME_PMU DRIVER_NAME " PMU" 34 35 #define CCI_PORT_CTRL 0x0 36 #define CCI_CTRL_STATUS 0xc 37 38 #define CCI_ENABLE_SNOOP_REQ 0x1 39 #define CCI_ENABLE_DVM_REQ 0x2 40 #define CCI_ENABLE_REQ (CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ) 41 42 struct cci_nb_ports { 43 unsigned int nb_ace; 44 unsigned int nb_ace_lite; 45 }; 46 47 enum cci_ace_port_type { 48 ACE_INVALID_PORT = 0x0, 49 ACE_PORT, 50 ACE_LITE_PORT, 51 }; 52 53 struct cci_ace_port { 54 void __iomem *base; 55 unsigned long phys; 56 enum cci_ace_port_type type; 57 struct device_node *dn; 58 }; 59 60 static struct cci_ace_port *ports; 61 static unsigned int nb_cci_ports; 62 63 static void __iomem *cci_ctrl_base; 64 static unsigned long cci_ctrl_phys; 65 66 #ifdef CONFIG_HW_PERF_EVENTS 67 68 #define CCI_PMCR 0x0100 69 #define CCI_PID2 0x0fe8 70 71 #define CCI_PMCR_CEN 0x00000001 72 #define CCI_PMCR_NCNT_MASK 0x0000f800 73 #define CCI_PMCR_NCNT_SHIFT 11 74 75 #define CCI_PID2_REV_MASK 0xf0 76 #define CCI_PID2_REV_SHIFT 4 77 78 /* Port ids */ 79 #define CCI_PORT_S0 0 80 #define CCI_PORT_S1 1 81 #define CCI_PORT_S2 2 82 #define CCI_PORT_S3 3 83 #define CCI_PORT_S4 4 84 #define CCI_PORT_M0 5 85 #define CCI_PORT_M1 6 86 #define CCI_PORT_M2 7 87 88 #define CCI_REV_R0 0 89 #define CCI_REV_R1 1 90 #define CCI_REV_R1_PX 5 91 92 #define CCI_PMU_EVT_SEL 0x000 93 #define CCI_PMU_CNTR 0x004 94 #define CCI_PMU_CNTR_CTRL 0x008 95 #define CCI_PMU_OVRFLW 0x00c 96 97 #define CCI_PMU_OVRFLW_FLAG 1 98 99 #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) 100 101 /* 102 * Instead of an event id to monitor CCI cycles, a dedicated counter is 103 * provided. Use 0xff to represent CCI cycles and hope that no future revisions 104 * make use of this event in hardware. 105 */ 106 enum cci400_perf_events { 107 CCI_PMU_CYCLES = 0xff 108 }; 109 110 #define CCI_PMU_EVENT_MASK 0xff 111 #define CCI_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7) 112 #define CCI_PMU_EVENT_CODE(event) (event & 0x1f) 113 114 #define CCI_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */ 115 116 #define CCI_PMU_CYCLE_CNTR_IDX 0 117 #define CCI_PMU_CNTR0_IDX 1 118 #define CCI_PMU_CNTR_LAST(cci_pmu) (CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1) 119 120 /* 121 * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8 122 * ports and bits 4:0 are event codes. There are different event codes 123 * associated with each port type. 124 * 125 * Additionally, the range of events associated with the port types changed 126 * between Rev0 and Rev1. 127 * 128 * The constants below define the range of valid codes for each port type for 129 * the different revisions and are used to validate the event to be monitored. 130 */ 131 132 #define CCI_REV_R0_SLAVE_PORT_MIN_EV 0x00 133 #define CCI_REV_R0_SLAVE_PORT_MAX_EV 0x13 134 #define CCI_REV_R0_MASTER_PORT_MIN_EV 0x14 135 #define CCI_REV_R0_MASTER_PORT_MAX_EV 0x1a 136 137 #define CCI_REV_R1_SLAVE_PORT_MIN_EV 0x00 138 #define CCI_REV_R1_SLAVE_PORT_MAX_EV 0x14 139 #define CCI_REV_R1_MASTER_PORT_MIN_EV 0x00 140 #define CCI_REV_R1_MASTER_PORT_MAX_EV 0x11 141 142 struct pmu_port_event_ranges { 143 u8 slave_min; 144 u8 slave_max; 145 u8 master_min; 146 u8 master_max; 147 }; 148 149 static struct pmu_port_event_ranges port_event_range[] = { 150 [CCI_REV_R0] = { 151 .slave_min = CCI_REV_R0_SLAVE_PORT_MIN_EV, 152 .slave_max = CCI_REV_R0_SLAVE_PORT_MAX_EV, 153 .master_min = CCI_REV_R0_MASTER_PORT_MIN_EV, 154 .master_max = CCI_REV_R0_MASTER_PORT_MAX_EV, 155 }, 156 [CCI_REV_R1] = { 157 .slave_min = CCI_REV_R1_SLAVE_PORT_MIN_EV, 158 .slave_max = CCI_REV_R1_SLAVE_PORT_MAX_EV, 159 .master_min = CCI_REV_R1_MASTER_PORT_MIN_EV, 160 .master_max = CCI_REV_R1_MASTER_PORT_MAX_EV, 161 }, 162 }; 163 164 /* 165 * Export different PMU names for the different revisions so userspace knows 166 * because the event ids are different 167 */ 168 static char *const pmu_names[] = { 169 [CCI_REV_R0] = "CCI_400", 170 [CCI_REV_R1] = "CCI_400_r1", 171 }; 172 173 struct cci_pmu_drv_data { 174 void __iomem *base; 175 struct arm_pmu *cci_pmu; 176 int nr_irqs; 177 int irqs[CCI_PMU_MAX_HW_EVENTS]; 178 unsigned long active_irqs; 179 struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; 180 unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; 181 struct pmu_port_event_ranges *port_ranges; 182 struct pmu_hw_events hw_events; 183 }; 184 static struct cci_pmu_drv_data *pmu; 185 186 static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) 187 { 188 int i; 189 190 for (i = 0; i < nr_irqs; i++) 191 if (irq == irqs[i]) 192 return true; 193 194 return false; 195 } 196 197 static int probe_cci_revision(void) 198 { 199 int rev; 200 rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK; 201 rev >>= CCI_PID2_REV_SHIFT; 202 203 if (rev < CCI_REV_R1_PX) 204 return CCI_REV_R0; 205 else 206 return CCI_REV_R1; 207 } 208 209 static struct pmu_port_event_ranges *port_range_by_rev(void) 210 { 211 int rev = probe_cci_revision(); 212 213 return &port_event_range[rev]; 214 } 215 216 static int pmu_is_valid_slave_event(u8 ev_code) 217 { 218 return pmu->port_ranges->slave_min <= ev_code && 219 ev_code <= pmu->port_ranges->slave_max; 220 } 221 222 static int pmu_is_valid_master_event(u8 ev_code) 223 { 224 return pmu->port_ranges->master_min <= ev_code && 225 ev_code <= pmu->port_ranges->master_max; 226 } 227 228 static int pmu_validate_hw_event(u8 hw_event) 229 { 230 u8 ev_source = CCI_PMU_EVENT_SOURCE(hw_event); 231 u8 ev_code = CCI_PMU_EVENT_CODE(hw_event); 232 233 switch (ev_source) { 234 case CCI_PORT_S0: 235 case CCI_PORT_S1: 236 case CCI_PORT_S2: 237 case CCI_PORT_S3: 238 case CCI_PORT_S4: 239 /* Slave Interface */ 240 if (pmu_is_valid_slave_event(ev_code)) 241 return hw_event; 242 break; 243 case CCI_PORT_M0: 244 case CCI_PORT_M1: 245 case CCI_PORT_M2: 246 /* Master Interface */ 247 if (pmu_is_valid_master_event(ev_code)) 248 return hw_event; 249 break; 250 } 251 252 return -ENOENT; 253 } 254 255 static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx) 256 { 257 return CCI_PMU_CYCLE_CNTR_IDX <= idx && 258 idx <= CCI_PMU_CNTR_LAST(cci_pmu); 259 } 260 261 static u32 pmu_read_register(int idx, unsigned int offset) 262 { 263 return readl_relaxed(pmu->base + CCI_PMU_CNTR_BASE(idx) + offset); 264 } 265 266 static void pmu_write_register(u32 value, int idx, unsigned int offset) 267 { 268 return writel_relaxed(value, pmu->base + CCI_PMU_CNTR_BASE(idx) + offset); 269 } 270 271 static void pmu_disable_counter(int idx) 272 { 273 pmu_write_register(0, idx, CCI_PMU_CNTR_CTRL); 274 } 275 276 static void pmu_enable_counter(int idx) 277 { 278 pmu_write_register(1, idx, CCI_PMU_CNTR_CTRL); 279 } 280 281 static void pmu_set_event(int idx, unsigned long event) 282 { 283 event &= CCI_PMU_EVENT_MASK; 284 pmu_write_register(event, idx, CCI_PMU_EVT_SEL); 285 } 286 287 static u32 pmu_get_max_counters(void) 288 { 289 u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) & 290 CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT; 291 292 /* add 1 for cycle counter */ 293 return n_cnts + 1; 294 } 295 296 static struct pmu_hw_events *pmu_get_hw_events(void) 297 { 298 return &pmu->hw_events; 299 } 300 301 static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) 302 { 303 struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); 304 struct hw_perf_event *hw_event = &event->hw; 305 unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; 306 int idx; 307 308 if (cci_event == CCI_PMU_CYCLES) { 309 if (test_and_set_bit(CCI_PMU_CYCLE_CNTR_IDX, hw->used_mask)) 310 return -EAGAIN; 311 312 return CCI_PMU_CYCLE_CNTR_IDX; 313 } 314 315 for (idx = CCI_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx) 316 if (!test_and_set_bit(idx, hw->used_mask)) 317 return idx; 318 319 /* No counters available */ 320 return -EAGAIN; 321 } 322 323 static int pmu_map_event(struct perf_event *event) 324 { 325 int mapping; 326 u8 config = event->attr.config & CCI_PMU_EVENT_MASK; 327 328 if (event->attr.type < PERF_TYPE_MAX) 329 return -ENOENT; 330 331 if (config == CCI_PMU_CYCLES) 332 mapping = config; 333 else 334 mapping = pmu_validate_hw_event(config); 335 336 return mapping; 337 } 338 339 static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) 340 { 341 int i; 342 struct platform_device *pmu_device = cci_pmu->plat_device; 343 344 if (unlikely(!pmu_device)) 345 return -ENODEV; 346 347 if (pmu->nr_irqs < 1) { 348 dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n"); 349 return -ENODEV; 350 } 351 352 /* 353 * Register all available CCI PMU interrupts. In the interrupt handler 354 * we iterate over the counters checking for interrupt source (the 355 * overflowing counter) and clear it. 356 * 357 * This should allow handling of non-unique interrupt for the counters. 358 */ 359 for (i = 0; i < pmu->nr_irqs; i++) { 360 int err = request_irq(pmu->irqs[i], handler, IRQF_SHARED, 361 "arm-cci-pmu", cci_pmu); 362 if (err) { 363 dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", 364 pmu->irqs[i]); 365 return err; 366 } 367 368 set_bit(i, &pmu->active_irqs); 369 } 370 371 return 0; 372 } 373 374 static irqreturn_t pmu_handle_irq(int irq_num, void *dev) 375 { 376 unsigned long flags; 377 struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; 378 struct pmu_hw_events *events = cci_pmu->get_hw_events(); 379 struct perf_sample_data data; 380 struct pt_regs *regs; 381 int idx, handled = IRQ_NONE; 382 383 raw_spin_lock_irqsave(&events->pmu_lock, flags); 384 regs = get_irq_regs(); 385 /* 386 * Iterate over counters and update the corresponding perf events. 387 * This should work regardless of whether we have per-counter overflow 388 * interrupt or a combined overflow interrupt. 389 */ 390 for (idx = CCI_PMU_CYCLE_CNTR_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { 391 struct perf_event *event = events->events[idx]; 392 struct hw_perf_event *hw_counter; 393 394 if (!event) 395 continue; 396 397 hw_counter = &event->hw; 398 399 /* Did this counter overflow? */ 400 if (!(pmu_read_register(idx, CCI_PMU_OVRFLW) & 401 CCI_PMU_OVRFLW_FLAG)) 402 continue; 403 404 pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); 405 406 handled = IRQ_HANDLED; 407 408 armpmu_event_update(event); 409 perf_sample_data_init(&data, 0, hw_counter->last_period); 410 if (!armpmu_event_set_period(event)) 411 continue; 412 413 if (perf_event_overflow(event, &data, regs)) 414 cci_pmu->disable(event); 415 } 416 raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 417 418 return IRQ_RETVAL(handled); 419 } 420 421 static void pmu_free_irq(struct arm_pmu *cci_pmu) 422 { 423 int i; 424 425 for (i = 0; i < pmu->nr_irqs; i++) { 426 if (!test_and_clear_bit(i, &pmu->active_irqs)) 427 continue; 428 429 free_irq(pmu->irqs[i], cci_pmu); 430 } 431 } 432 433 static void pmu_enable_event(struct perf_event *event) 434 { 435 unsigned long flags; 436 struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); 437 struct pmu_hw_events *events = cci_pmu->get_hw_events(); 438 struct hw_perf_event *hw_counter = &event->hw; 439 int idx = hw_counter->idx; 440 441 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 442 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 443 return; 444 } 445 446 raw_spin_lock_irqsave(&events->pmu_lock, flags); 447 448 /* Configure the event to count, unless you are counting cycles */ 449 if (idx != CCI_PMU_CYCLE_CNTR_IDX) 450 pmu_set_event(idx, hw_counter->config_base); 451 452 pmu_enable_counter(idx); 453 454 raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 455 } 456 457 static void pmu_disable_event(struct perf_event *event) 458 { 459 struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); 460 struct hw_perf_event *hw_counter = &event->hw; 461 int idx = hw_counter->idx; 462 463 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 464 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 465 return; 466 } 467 468 pmu_disable_counter(idx); 469 } 470 471 static void pmu_start(struct arm_pmu *cci_pmu) 472 { 473 u32 val; 474 unsigned long flags; 475 struct pmu_hw_events *events = cci_pmu->get_hw_events(); 476 477 raw_spin_lock_irqsave(&events->pmu_lock, flags); 478 479 /* Enable all the PMU counters. */ 480 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; 481 writel(val, cci_ctrl_base + CCI_PMCR); 482 483 raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 484 } 485 486 static void pmu_stop(struct arm_pmu *cci_pmu) 487 { 488 u32 val; 489 unsigned long flags; 490 struct pmu_hw_events *events = cci_pmu->get_hw_events(); 491 492 raw_spin_lock_irqsave(&events->pmu_lock, flags); 493 494 /* Disable all the PMU counters. */ 495 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; 496 writel(val, cci_ctrl_base + CCI_PMCR); 497 498 raw_spin_unlock_irqrestore(&events->pmu_lock, flags); 499 } 500 501 static u32 pmu_read_counter(struct perf_event *event) 502 { 503 struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); 504 struct hw_perf_event *hw_counter = &event->hw; 505 int idx = hw_counter->idx; 506 u32 value; 507 508 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { 509 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 510 return 0; 511 } 512 value = pmu_read_register(idx, CCI_PMU_CNTR); 513 514 return value; 515 } 516 517 static void pmu_write_counter(struct perf_event *event, u32 value) 518 { 519 struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); 520 struct hw_perf_event *hw_counter = &event->hw; 521 int idx = hw_counter->idx; 522 523 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) 524 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); 525 else 526 pmu_write_register(value, idx, CCI_PMU_CNTR); 527 } 528 529 static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev) 530 { 531 *cci_pmu = (struct arm_pmu){ 532 .name = pmu_names[probe_cci_revision()], 533 .max_period = (1LLU << 32) - 1, 534 .get_hw_events = pmu_get_hw_events, 535 .get_event_idx = pmu_get_event_idx, 536 .map_event = pmu_map_event, 537 .request_irq = pmu_request_irq, 538 .handle_irq = pmu_handle_irq, 539 .free_irq = pmu_free_irq, 540 .enable = pmu_enable_event, 541 .disable = pmu_disable_event, 542 .start = pmu_start, 543 .stop = pmu_stop, 544 .read_counter = pmu_read_counter, 545 .write_counter = pmu_write_counter, 546 }; 547 548 cci_pmu->plat_device = pdev; 549 cci_pmu->num_events = pmu_get_max_counters(); 550 551 return armpmu_register(cci_pmu, -1); 552 } 553 554 static const struct of_device_id arm_cci_pmu_matches[] = { 555 { 556 .compatible = "arm,cci-400-pmu", 557 }, 558 {}, 559 }; 560 561 static int cci_pmu_probe(struct platform_device *pdev) 562 { 563 struct resource *res; 564 int i, ret, irq; 565 566 pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL); 567 if (!pmu) 568 return -ENOMEM; 569 570 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 571 pmu->base = devm_ioremap_resource(&pdev->dev, res); 572 if (IS_ERR(pmu->base)) 573 return -ENOMEM; 574 575 /* 576 * CCI PMU has 5 overflow signals - one per counter; but some may be tied 577 * together to a common interrupt. 578 */ 579 pmu->nr_irqs = 0; 580 for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) { 581 irq = platform_get_irq(pdev, i); 582 if (irq < 0) 583 break; 584 585 if (is_duplicate_irq(irq, pmu->irqs, pmu->nr_irqs)) 586 continue; 587 588 pmu->irqs[pmu->nr_irqs++] = irq; 589 } 590 591 /* 592 * Ensure that the device tree has as many interrupts as the number 593 * of counters. 594 */ 595 if (i < CCI_PMU_MAX_HW_EVENTS) { 596 dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n", 597 i, CCI_PMU_MAX_HW_EVENTS); 598 return -EINVAL; 599 } 600 601 pmu->port_ranges = port_range_by_rev(); 602 if (!pmu->port_ranges) { 603 dev_warn(&pdev->dev, "CCI PMU version not supported\n"); 604 return -EINVAL; 605 } 606 607 pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL); 608 if (!pmu->cci_pmu) 609 return -ENOMEM; 610 611 pmu->hw_events.events = pmu->events; 612 pmu->hw_events.used_mask = pmu->used_mask; 613 raw_spin_lock_init(&pmu->hw_events.pmu_lock); 614 615 ret = cci_pmu_init(pmu->cci_pmu, pdev); 616 if (ret) 617 return ret; 618 619 return 0; 620 } 621 622 static int cci_platform_probe(struct platform_device *pdev) 623 { 624 if (!cci_probed()) 625 return -ENODEV; 626 627 return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); 628 } 629 630 #endif /* CONFIG_HW_PERF_EVENTS */ 631 632 struct cpu_port { 633 u64 mpidr; 634 u32 port; 635 }; 636 637 /* 638 * Use the port MSB as valid flag, shift can be made dynamic 639 * by computing number of bits required for port indexes. 640 * Code disabling CCI cpu ports runs with D-cache invalidated 641 * and SCTLR bit clear so data accesses must be kept to a minimum 642 * to improve performance; for now shift is left static to 643 * avoid one more data access while disabling the CCI port. 644 */ 645 #define PORT_VALID_SHIFT 31 646 #define PORT_VALID (0x1 << PORT_VALID_SHIFT) 647 648 static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr) 649 { 650 port->port = PORT_VALID | index; 651 port->mpidr = mpidr; 652 } 653 654 static inline bool cpu_port_is_valid(struct cpu_port *port) 655 { 656 return !!(port->port & PORT_VALID); 657 } 658 659 static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr) 660 { 661 return port->mpidr == (mpidr & MPIDR_HWID_BITMASK); 662 } 663 664 static struct cpu_port cpu_port[NR_CPUS]; 665 666 /** 667 * __cci_ace_get_port - Function to retrieve the port index connected to 668 * a cpu or device. 669 * 670 * @dn: device node of the device to look-up 671 * @type: port type 672 * 673 * Return value: 674 * - CCI port index if success 675 * - -ENODEV if failure 676 */ 677 static int __cci_ace_get_port(struct device_node *dn, int type) 678 { 679 int i; 680 bool ace_match; 681 struct device_node *cci_portn; 682 683 cci_portn = of_parse_phandle(dn, "cci-control-port", 0); 684 for (i = 0; i < nb_cci_ports; i++) { 685 ace_match = ports[i].type == type; 686 if (ace_match && cci_portn == ports[i].dn) 687 return i; 688 } 689 return -ENODEV; 690 } 691 692 int cci_ace_get_port(struct device_node *dn) 693 { 694 return __cci_ace_get_port(dn, ACE_LITE_PORT); 695 } 696 EXPORT_SYMBOL_GPL(cci_ace_get_port); 697 698 static void cci_ace_init_ports(void) 699 { 700 int port, cpu; 701 struct device_node *cpun; 702 703 /* 704 * Port index look-up speeds up the function disabling ports by CPU, 705 * since the logical to port index mapping is done once and does 706 * not change after system boot. 707 * The stashed index array is initialized for all possible CPUs 708 * at probe time. 709 */ 710 for_each_possible_cpu(cpu) { 711 /* too early to use cpu->of_node */ 712 cpun = of_get_cpu_node(cpu, NULL); 713 714 if (WARN(!cpun, "Missing cpu device node\n")) 715 continue; 716 717 port = __cci_ace_get_port(cpun, ACE_PORT); 718 if (port < 0) 719 continue; 720 721 init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu)); 722 } 723 724 for_each_possible_cpu(cpu) { 725 WARN(!cpu_port_is_valid(&cpu_port[cpu]), 726 "CPU %u does not have an associated CCI port\n", 727 cpu); 728 } 729 } 730 /* 731 * Functions to enable/disable a CCI interconnect slave port 732 * 733 * They are called by low-level power management code to disable slave 734 * interfaces snoops and DVM broadcast. 735 * Since they may execute with cache data allocation disabled and 736 * after the caches have been cleaned and invalidated the functions provide 737 * no explicit locking since they may run with D-cache disabled, so normal 738 * cacheable kernel locks based on ldrex/strex may not work. 739 * Locking has to be provided by BSP implementations to ensure proper 740 * operations. 741 */ 742 743 /** 744 * cci_port_control() - function to control a CCI port 745 * 746 * @port: index of the port to setup 747 * @enable: if true enables the port, if false disables it 748 */ 749 static void notrace cci_port_control(unsigned int port, bool enable) 750 { 751 void __iomem *base = ports[port].base; 752 753 writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL); 754 /* 755 * This function is called from power down procedures 756 * and must not execute any instruction that might 757 * cause the processor to be put in a quiescent state 758 * (eg wfi). Hence, cpu_relax() can not be added to this 759 * read loop to optimize power, since it might hide possibly 760 * disruptive operations. 761 */ 762 while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1) 763 ; 764 } 765 766 /** 767 * cci_disable_port_by_cpu() - function to disable a CCI port by CPU 768 * reference 769 * 770 * @mpidr: mpidr of the CPU whose CCI port should be disabled 771 * 772 * Disabling a CCI port for a CPU implies disabling the CCI port 773 * controlling that CPU cluster. Code disabling CPU CCI ports 774 * must make sure that the CPU running the code is the last active CPU 775 * in the cluster ie all other CPUs are quiescent in a low power state. 776 * 777 * Return: 778 * 0 on success 779 * -ENODEV on port look-up failure 780 */ 781 int notrace cci_disable_port_by_cpu(u64 mpidr) 782 { 783 int cpu; 784 bool is_valid; 785 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 786 is_valid = cpu_port_is_valid(&cpu_port[cpu]); 787 if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) { 788 cci_port_control(cpu_port[cpu].port, false); 789 return 0; 790 } 791 } 792 return -ENODEV; 793 } 794 EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu); 795 796 /** 797 * cci_enable_port_for_self() - enable a CCI port for calling CPU 798 * 799 * Enabling a CCI port for the calling CPU implies enabling the CCI 800 * port controlling that CPU's cluster. Caller must make sure that the 801 * CPU running the code is the first active CPU in the cluster and all 802 * other CPUs are quiescent in a low power state or waiting for this CPU 803 * to complete the CCI initialization. 804 * 805 * Because this is called when the MMU is still off and with no stack, 806 * the code must be position independent and ideally rely on callee 807 * clobbered registers only. To achieve this we must code this function 808 * entirely in assembler. 809 * 810 * On success this returns with the proper CCI port enabled. In case of 811 * any failure this never returns as the inability to enable the CCI is 812 * fatal and there is no possible recovery at this stage. 813 */ 814 asmlinkage void __naked cci_enable_port_for_self(void) 815 { 816 asm volatile ("\n" 817 " .arch armv7-a\n" 818 " mrc p15, 0, r0, c0, c0, 5 @ get MPIDR value \n" 819 " and r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n" 820 " adr r1, 5f \n" 821 " ldr r2, [r1] \n" 822 " add r1, r1, r2 @ &cpu_port \n" 823 " add ip, r1, %[sizeof_cpu_port] \n" 824 825 /* Loop over the cpu_port array looking for a matching MPIDR */ 826 "1: ldr r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n" 827 " cmp r2, r0 @ compare MPIDR \n" 828 " bne 2f \n" 829 830 /* Found a match, now test port validity */ 831 " ldr r3, [r1, %[offsetof_cpu_port_port]] \n" 832 " tst r3, #"__stringify(PORT_VALID)" \n" 833 " bne 3f \n" 834 835 /* no match, loop with the next cpu_port entry */ 836 "2: add r1, r1, %[sizeof_struct_cpu_port] \n" 837 " cmp r1, ip @ done? \n" 838 " blo 1b \n" 839 840 /* CCI port not found -- cheaply try to stall this CPU */ 841 "cci_port_not_found: \n" 842 " wfi \n" 843 " wfe \n" 844 " b cci_port_not_found \n" 845 846 /* Use matched port index to look up the corresponding ports entry */ 847 "3: bic r3, r3, #"__stringify(PORT_VALID)" \n" 848 " adr r0, 6f \n" 849 " ldmia r0, {r1, r2} \n" 850 " sub r1, r1, r0 @ virt - phys \n" 851 " ldr r0, [r0, r2] @ *(&ports) \n" 852 " mov r2, %[sizeof_struct_ace_port] \n" 853 " mla r0, r2, r3, r0 @ &ports[index] \n" 854 " sub r0, r0, r1 @ virt_to_phys() \n" 855 856 /* Enable the CCI port */ 857 " ldr r0, [r0, %[offsetof_port_phys]] \n" 858 " mov r3, %[cci_enable_req]\n" 859 " str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" 860 861 /* poll the status reg for completion */ 862 " adr r1, 7f \n" 863 " ldr r0, [r1] \n" 864 " ldr r0, [r0, r1] @ cci_ctrl_base \n" 865 "4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" 866 " tst r1, %[cci_control_status_bits] \n" 867 " bne 4b \n" 868 869 " mov r0, #0 \n" 870 " bx lr \n" 871 872 " .align 2 \n" 873 "5: .word cpu_port - . \n" 874 "6: .word . \n" 875 " .word ports - 6b \n" 876 "7: .word cci_ctrl_phys - . \n" 877 : : 878 [sizeof_cpu_port] "i" (sizeof(cpu_port)), 879 [cci_enable_req] "i" cpu_to_le32(CCI_ENABLE_REQ), 880 [cci_control_status_bits] "i" cpu_to_le32(1), 881 #ifndef __ARMEB__ 882 [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), 883 #else 884 [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4), 885 #endif 886 [offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)), 887 [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), 888 [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), 889 [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); 890 891 unreachable(); 892 } 893 894 /** 895 * __cci_control_port_by_device() - function to control a CCI port by device 896 * reference 897 * 898 * @dn: device node pointer of the device whose CCI port should be 899 * controlled 900 * @enable: if true enables the port, if false disables it 901 * 902 * Return: 903 * 0 on success 904 * -ENODEV on port look-up failure 905 */ 906 int notrace __cci_control_port_by_device(struct device_node *dn, bool enable) 907 { 908 int port; 909 910 if (!dn) 911 return -ENODEV; 912 913 port = __cci_ace_get_port(dn, ACE_LITE_PORT); 914 if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n", 915 dn->full_name)) 916 return -ENODEV; 917 cci_port_control(port, enable); 918 return 0; 919 } 920 EXPORT_SYMBOL_GPL(__cci_control_port_by_device); 921 922 /** 923 * __cci_control_port_by_index() - function to control a CCI port by port index 924 * 925 * @port: port index previously retrieved with cci_ace_get_port() 926 * @enable: if true enables the port, if false disables it 927 * 928 * Return: 929 * 0 on success 930 * -ENODEV on port index out of range 931 * -EPERM if operation carried out on an ACE PORT 932 */ 933 int notrace __cci_control_port_by_index(u32 port, bool enable) 934 { 935 if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT) 936 return -ENODEV; 937 /* 938 * CCI control for ports connected to CPUS is extremely fragile 939 * and must be made to go through a specific and controlled 940 * interface (ie cci_disable_port_by_cpu(); control by general purpose 941 * indexing is therefore disabled for ACE ports. 942 */ 943 if (ports[port].type == ACE_PORT) 944 return -EPERM; 945 946 cci_port_control(port, enable); 947 return 0; 948 } 949 EXPORT_SYMBOL_GPL(__cci_control_port_by_index); 950 951 static const struct cci_nb_ports cci400_ports = { 952 .nb_ace = 2, 953 .nb_ace_lite = 3 954 }; 955 956 static const struct of_device_id arm_cci_matches[] = { 957 {.compatible = "arm,cci-400", .data = &cci400_ports }, 958 {}, 959 }; 960 961 static const struct of_device_id arm_cci_ctrl_if_matches[] = { 962 {.compatible = "arm,cci-400-ctrl-if", }, 963 {}, 964 }; 965 966 static int cci_probe(void) 967 { 968 struct cci_nb_ports const *cci_config; 969 int ret, i, nb_ace = 0, nb_ace_lite = 0; 970 struct device_node *np, *cp; 971 struct resource res; 972 const char *match_str; 973 bool is_ace; 974 975 np = of_find_matching_node(NULL, arm_cci_matches); 976 if (!np) 977 return -ENODEV; 978 979 cci_config = of_match_node(arm_cci_matches, np)->data; 980 if (!cci_config) 981 return -ENODEV; 982 983 nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite; 984 985 ports = kcalloc(nb_cci_ports, sizeof(*ports), GFP_KERNEL); 986 if (!ports) 987 return -ENOMEM; 988 989 ret = of_address_to_resource(np, 0, &res); 990 if (!ret) { 991 cci_ctrl_base = ioremap(res.start, resource_size(&res)); 992 cci_ctrl_phys = res.start; 993 } 994 if (ret || !cci_ctrl_base) { 995 WARN(1, "unable to ioremap CCI ctrl\n"); 996 ret = -ENXIO; 997 goto memalloc_err; 998 } 999 1000 for_each_child_of_node(np, cp) { 1001 if (!of_match_node(arm_cci_ctrl_if_matches, cp)) 1002 continue; 1003 1004 i = nb_ace + nb_ace_lite; 1005 1006 if (i >= nb_cci_ports) 1007 break; 1008 1009 if (of_property_read_string(cp, "interface-type", 1010 &match_str)) { 1011 WARN(1, "node %s missing interface-type property\n", 1012 cp->full_name); 1013 continue; 1014 } 1015 is_ace = strcmp(match_str, "ace") == 0; 1016 if (!is_ace && strcmp(match_str, "ace-lite")) { 1017 WARN(1, "node %s containing invalid interface-type property, skipping it\n", 1018 cp->full_name); 1019 continue; 1020 } 1021 1022 ret = of_address_to_resource(cp, 0, &res); 1023 if (!ret) { 1024 ports[i].base = ioremap(res.start, resource_size(&res)); 1025 ports[i].phys = res.start; 1026 } 1027 if (ret || !ports[i].base) { 1028 WARN(1, "unable to ioremap CCI port %d\n", i); 1029 continue; 1030 } 1031 1032 if (is_ace) { 1033 if (WARN_ON(nb_ace >= cci_config->nb_ace)) 1034 continue; 1035 ports[i].type = ACE_PORT; 1036 ++nb_ace; 1037 } else { 1038 if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite)) 1039 continue; 1040 ports[i].type = ACE_LITE_PORT; 1041 ++nb_ace_lite; 1042 } 1043 ports[i].dn = cp; 1044 } 1045 1046 /* initialize a stashed array of ACE ports to speed-up look-up */ 1047 cci_ace_init_ports(); 1048 1049 /* 1050 * Multi-cluster systems may need this data when non-coherent, during 1051 * cluster power-up/power-down. Make sure it reaches main memory. 1052 */ 1053 sync_cache_w(&cci_ctrl_base); 1054 sync_cache_w(&cci_ctrl_phys); 1055 sync_cache_w(&ports); 1056 sync_cache_w(&cpu_port); 1057 __sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports); 1058 pr_info("ARM CCI driver probed\n"); 1059 return 0; 1060 1061 memalloc_err: 1062 1063 kfree(ports); 1064 return ret; 1065 } 1066 1067 static int cci_init_status = -EAGAIN; 1068 static DEFINE_MUTEX(cci_probing); 1069 1070 static int cci_init(void) 1071 { 1072 if (cci_init_status != -EAGAIN) 1073 return cci_init_status; 1074 1075 mutex_lock(&cci_probing); 1076 if (cci_init_status == -EAGAIN) 1077 cci_init_status = cci_probe(); 1078 mutex_unlock(&cci_probing); 1079 return cci_init_status; 1080 } 1081 1082 #ifdef CONFIG_HW_PERF_EVENTS 1083 static struct platform_driver cci_pmu_driver = { 1084 .driver = { 1085 .name = DRIVER_NAME_PMU, 1086 .of_match_table = arm_cci_pmu_matches, 1087 }, 1088 .probe = cci_pmu_probe, 1089 }; 1090 1091 static struct platform_driver cci_platform_driver = { 1092 .driver = { 1093 .name = DRIVER_NAME, 1094 .of_match_table = arm_cci_matches, 1095 }, 1096 .probe = cci_platform_probe, 1097 }; 1098 1099 static int __init cci_platform_init(void) 1100 { 1101 int ret; 1102 1103 ret = platform_driver_register(&cci_pmu_driver); 1104 if (ret) 1105 return ret; 1106 1107 return platform_driver_register(&cci_platform_driver); 1108 } 1109 1110 #else 1111 1112 static int __init cci_platform_init(void) 1113 { 1114 return 0; 1115 } 1116 1117 #endif 1118 /* 1119 * To sort out early init calls ordering a helper function is provided to 1120 * check if the CCI driver has beed initialized. Function check if the driver 1121 * has been initialized, if not it calls the init function that probes 1122 * the driver and updates the return value. 1123 */ 1124 bool cci_probed(void) 1125 { 1126 return cci_init() == 0; 1127 } 1128 EXPORT_SYMBOL_GPL(cci_probed); 1129 1130 early_initcall(cci_init); 1131 core_initcall(cci_platform_init); 1132 MODULE_LICENSE("GPL"); 1133 MODULE_DESCRIPTION("ARM CCI support"); 1134