1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Low-Level PCI Express Support for the SH7786 4 * 5 * Copyright (C) 2009 - 2011 Paul Mundt 6 */ 7 #define pr_fmt(fmt) "PCI: " fmt 8 9 #include <linux/pci.h> 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/io.h> 13 #include <linux/async.h> 14 #include <linux/delay.h> 15 #include <linux/dma-map-ops.h> 16 #include <linux/slab.h> 17 #include <linux/clk.h> 18 #include <linux/sh_clk.h> 19 #include <linux/sh_intc.h> 20 #include <cpu/sh7786.h> 21 #include "pcie-sh7786.h" 22 #include <linux/sizes.h> 23 24 struct sh7786_pcie_port { 25 struct pci_channel *hose; 26 struct clk *fclk, phy_clk; 27 unsigned int index; 28 int endpoint; 29 int link; 30 }; 31 32 static struct sh7786_pcie_port *sh7786_pcie_ports; 33 static unsigned int nr_ports; 34 static unsigned long dma_pfn_offset; 35 size_t memsize; 36 u64 memstart; 37 38 static struct sh7786_pcie_hwops { 39 int (*core_init)(void); 40 async_func_t port_init_hw; 41 } *sh7786_pcie_hwops; 42 43 static struct resource sh7786_pci0_resources[] = { 44 { 45 .name = "PCIe0 MEM 0", 46 .start = 0xfd000000, 47 .end = 0xfd000000 + SZ_8M - 1, 48 .flags = IORESOURCE_MEM, 49 }, { 50 .name = "PCIe0 MEM 1", 51 .start = 0xc0000000, 52 .end = 0xc0000000 + SZ_512M - 1, 53 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 54 }, { 55 .name = "PCIe0 MEM 2", 56 .start = 0x10000000, 57 .end = 0x10000000 + SZ_64M - 1, 58 .flags = IORESOURCE_MEM, 59 }, { 60 .name = "PCIe0 IO", 61 .start = 0xfe100000, 62 .end = 0xfe100000 + SZ_1M - 1, 63 .flags = IORESOURCE_IO, 64 }, 65 }; 66 67 static struct resource sh7786_pci1_resources[] = { 68 { 69 .name = "PCIe1 MEM 0", 70 .start = 0xfd800000, 71 .end = 0xfd800000 + SZ_8M - 1, 72 .flags = IORESOURCE_MEM, 73 }, { 74 .name = "PCIe1 MEM 1", 75 .start = 0xa0000000, 76 .end = 0xa0000000 + SZ_512M - 1, 77 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 78 }, { 79 .name = "PCIe1 MEM 2", 80 .start = 0x30000000, 81 .end = 0x30000000 + SZ_256M - 1, 82 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 83 }, { 84 .name = "PCIe1 IO", 85 .start = 0xfe300000, 86 .end = 0xfe300000 + SZ_1M - 1, 87 .flags = IORESOURCE_IO, 88 }, 89 }; 90 91 static struct resource sh7786_pci2_resources[] = { 92 { 93 .name = "PCIe2 MEM 0", 94 .start = 0xfc800000, 95 .end = 0xfc800000 + SZ_4M - 1, 96 .flags = IORESOURCE_MEM, 97 }, { 98 .name = "PCIe2 MEM 1", 99 .start = 0x80000000, 100 .end = 0x80000000 + SZ_512M - 1, 101 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 102 }, { 103 .name = "PCIe2 MEM 2", 104 .start = 0x20000000, 105 .end = 0x20000000 + SZ_256M - 1, 106 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 107 }, { 108 .name = "PCIe2 IO", 109 .start = 0xfcd00000, 110 .end = 0xfcd00000 + SZ_1M - 1, 111 .flags = IORESOURCE_IO, 112 }, 113 }; 114 115 extern struct pci_ops sh7786_pci_ops; 116 117 #define DEFINE_CONTROLLER(start, idx) \ 118 { \ 119 .pci_ops = &sh7786_pci_ops, \ 120 .resources = sh7786_pci##idx##_resources, \ 121 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 122 .reg_base = start, \ 123 .mem_offset = 0, \ 124 .io_offset = 0, \ 125 } 126 127 static struct pci_channel sh7786_pci_channels[] = { 128 DEFINE_CONTROLLER(0xfe000000, 0), 129 DEFINE_CONTROLLER(0xfe200000, 1), 130 DEFINE_CONTROLLER(0xfcc00000, 2), 131 }; 132 133 static struct clk fixed_pciexclkp = { 134 .rate = 100000000, /* 100 MHz reference clock */ 135 }; 136 137 static void sh7786_pci_fixup(struct pci_dev *dev) 138 { 139 /* 140 * Prevent enumeration of root complex resources. 141 */ 142 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 143 int i; 144 145 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 146 dev->resource[i].start = 0; 147 dev->resource[i].end = 0; 148 dev->resource[i].flags = 0; 149 } 150 } 151 } 152 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 153 sh7786_pci_fixup); 154 155 static int __init phy_wait_for_ack(struct pci_channel *chan) 156 { 157 unsigned int timeout = 100; 158 159 while (timeout--) { 160 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 161 return 0; 162 163 udelay(100); 164 } 165 166 return -ETIMEDOUT; 167 } 168 169 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 170 { 171 unsigned int timeout = 100; 172 173 while (timeout--) { 174 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 175 return 0; 176 177 udelay(100); 178 } 179 180 return -ETIMEDOUT; 181 } 182 183 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 184 unsigned int lane, unsigned int data) 185 { 186 unsigned long phyaddr; 187 188 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 189 ((addr & 0xff) << BITS_ADR); 190 191 /* Set write data */ 192 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 193 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 194 195 phy_wait_for_ack(chan); 196 197 /* Clear command */ 198 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 199 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 200 201 phy_wait_for_ack(chan); 202 } 203 204 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 205 { 206 struct pci_channel *chan = port->hose; 207 struct clk *clk; 208 char fclk_name[16]; 209 int ret; 210 211 /* 212 * First register the fixed clock 213 */ 214 ret = clk_register(&fixed_pciexclkp); 215 if (unlikely(ret != 0)) 216 return ret; 217 218 /* 219 * Grab the port's function clock, which the PHY clock depends 220 * on. clock lookups don't help us much at this point, since no 221 * dev_id is available this early. Lame. 222 */ 223 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 224 225 port->fclk = clk_get(NULL, fclk_name); 226 if (IS_ERR(port->fclk)) { 227 ret = PTR_ERR(port->fclk); 228 goto err_fclk; 229 } 230 231 clk_enable(port->fclk); 232 233 /* 234 * And now, set up the PHY clock 235 */ 236 clk = &port->phy_clk; 237 238 memset(clk, 0, sizeof(struct clk)); 239 240 clk->parent = &fixed_pciexclkp; 241 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 242 clk->enable_bit = BITS_CKE; 243 244 ret = sh_clk_mstp_register(clk, 1); 245 if (unlikely(ret < 0)) 246 goto err_phy; 247 248 return 0; 249 250 err_phy: 251 clk_disable(port->fclk); 252 clk_put(port->fclk); 253 err_fclk: 254 clk_unregister(&fixed_pciexclkp); 255 256 return ret; 257 } 258 259 static int __init phy_init(struct sh7786_pcie_port *port) 260 { 261 struct pci_channel *chan = port->hose; 262 unsigned int timeout = 100; 263 264 clk_enable(&port->phy_clk); 265 266 /* Initialize the phy */ 267 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 268 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 269 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 270 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 271 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 272 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 273 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 274 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 275 276 /* Deassert Standby */ 277 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 278 279 /* Disable clock */ 280 clk_disable(&port->phy_clk); 281 282 while (timeout--) { 283 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 284 return 0; 285 286 udelay(100); 287 } 288 289 return -ETIMEDOUT; 290 } 291 292 static void __init pcie_reset(struct sh7786_pcie_port *port) 293 { 294 struct pci_channel *chan = port->hose; 295 296 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 297 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 298 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 299 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 300 } 301 302 static int __init pcie_init(struct sh7786_pcie_port *port) 303 { 304 struct pci_channel *chan = port->hose; 305 unsigned int data; 306 phys_addr_t memstart, memend; 307 int ret, i, win; 308 309 /* Begin initialization */ 310 pcie_reset(port); 311 312 /* 313 * Initial header for port config space is type 1, set the device 314 * class to match. Hardware takes care of propagating the IDSETR 315 * settings, so there is no need to bother with a quirk. 316 */ 317 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI_NORMAL << 8, SH4A_PCIEIDSETR1); 318 319 /* Initialize default capabilities. */ 320 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 321 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 322 323 if (port->endpoint) 324 data |= PCI_EXP_TYPE_ENDPOINT << 20; 325 else 326 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 327 328 data |= PCI_CAP_ID_EXP; 329 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 330 331 /* Enable data link layer active state reporting */ 332 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 333 334 /* Enable extended sync and ASPM L0s support */ 335 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 336 data &= ~PCI_EXP_LNKCTL_ASPMC; 337 data |= PCI_EXP_LNKCTL_ES | 1; 338 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 339 340 /* Write out the physical slot number */ 341 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 342 data &= ~PCI_EXP_SLTCAP_PSN; 343 data |= (port->index + 1) << 19; 344 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 345 346 /* Set the completion timer timeout to the maximum 32ms. */ 347 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 348 data &= ~0x3f00; 349 data |= 0x32 << 8; 350 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 351 352 /* 353 * Set fast training sequences to the maximum 255, 354 * and enable MAC data scrambling. 355 */ 356 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 357 data &= ~PCIEMACCTLR_SCR_DIS; 358 data |= (0xff << 16); 359 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 360 361 memstart = __pa(memory_start); 362 memend = __pa(memory_end); 363 memsize = roundup_pow_of_two(memend - memstart); 364 365 /* 366 * The start address must be aligned on its size. So we round 367 * it down, and then recalculate the size so that it covers 368 * the entire memory. 369 */ 370 memstart = ALIGN_DOWN(memstart, memsize); 371 memsize = roundup_pow_of_two(memend - memstart); 372 373 /* 374 * If there's more than 512MB of memory, we need to roll over to 375 * LAR1/LAMR1. 376 */ 377 if (memsize > SZ_512M) { 378 pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1); 379 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 380 SH4A_PCIELAMR1); 381 memsize = SZ_512M; 382 } else { 383 /* 384 * Otherwise just zero it out and disable it. 385 */ 386 pci_write_reg(chan, 0, SH4A_PCIELAR1); 387 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 388 } 389 390 /* 391 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 392 * cover all of lowmem on most platforms. 393 */ 394 pci_write_reg(chan, memstart, SH4A_PCIELAR0); 395 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 396 397 /* Finish initialization */ 398 data = pci_read_reg(chan, SH4A_PCIETCTLR); 399 data |= 0x1; 400 pci_write_reg(chan, data, SH4A_PCIETCTLR); 401 402 /* Let things settle down a bit.. */ 403 mdelay(100); 404 405 /* Enable DL_Active Interrupt generation */ 406 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 407 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 408 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 409 410 /* Disable MAC data scrambling. */ 411 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 412 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 413 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 414 415 /* 416 * This will timeout if we don't have a link, but we permit the 417 * port to register anyways in order to support hotplug on future 418 * hardware. 419 */ 420 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 421 422 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 423 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 424 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 425 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 426 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 427 428 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 429 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 430 431 wmb(); 432 433 if (ret == 0) { 434 data = pci_read_reg(chan, SH4A_PCIEMACSR); 435 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 436 port->index, (data >> 20) & 0x3f); 437 } else 438 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 439 port->index); 440 441 for (i = win = 0; i < chan->nr_resources; i++) { 442 struct resource *res = chan->resources + i; 443 resource_size_t size; 444 u32 mask; 445 446 /* 447 * We can't use the 32-bit mode windows in legacy 29-bit 448 * mode, so just skip them entirely. 449 */ 450 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 451 res->flags |= IORESOURCE_DISABLED; 452 453 if (res->flags & IORESOURCE_DISABLED) 454 continue; 455 456 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 457 458 /* 459 * The PAMR mask is calculated in units of 256kB, which 460 * keeps things pretty simple. 461 */ 462 size = resource_size(res); 463 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 464 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 465 466 pci_write_reg(chan, upper_32_bits(res->start), 467 SH4A_PCIEPARH(win)); 468 pci_write_reg(chan, lower_32_bits(res->start), 469 SH4A_PCIEPARL(win)); 470 471 mask = MASK_PARE; 472 if (res->flags & IORESOURCE_IO) 473 mask |= MASK_SPC; 474 475 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 476 477 win++; 478 } 479 480 return 0; 481 } 482 483 int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 484 { 485 return evt2irq(0xae0); 486 } 487 488 void pcibios_bus_add_device(struct pci_dev *pdev) 489 { 490 dma_direct_set_offset(&pdev->dev, __pa(memory_start), 491 __pa(memory_start) - memstart, memsize); 492 } 493 494 static int __init sh7786_pcie_core_init(void) 495 { 496 /* Return the number of ports */ 497 return test_mode_pin(MODE_PIN12) ? 3 : 2; 498 } 499 500 static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 501 { 502 struct sh7786_pcie_port *port = data; 503 int ret; 504 505 /* 506 * Check if we are configured in endpoint or root complex mode, 507 * this is a fixed pin setting that applies to all PCIe ports. 508 */ 509 port->endpoint = test_mode_pin(MODE_PIN11); 510 511 /* 512 * Setup clocks, needed both for PHY and PCIe registers. 513 */ 514 ret = pcie_clk_init(port); 515 if (unlikely(ret < 0)) { 516 pr_err("clock initialization failed for port#%d\n", 517 port->index); 518 return; 519 } 520 521 ret = phy_init(port); 522 if (unlikely(ret < 0)) { 523 pr_err("phy initialization failed for port#%d\n", 524 port->index); 525 return; 526 } 527 528 ret = pcie_init(port); 529 if (unlikely(ret < 0)) { 530 pr_err("core initialization failed for port#%d\n", 531 port->index); 532 return; 533 } 534 535 /* In the interest of preserving device ordering, synchronize */ 536 async_synchronize_cookie(cookie); 537 538 register_pci_controller(port->hose); 539 } 540 541 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 542 .core_init = sh7786_pcie_core_init, 543 .port_init_hw = sh7786_pcie_init_hw, 544 }; 545 546 static int __init sh7786_pcie_init(void) 547 { 548 struct clk *platclk; 549 u32 mm_sel; 550 int i; 551 552 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 553 554 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 555 556 nr_ports = sh7786_pcie_hwops->core_init(); 557 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 558 559 if (unlikely(nr_ports == 0)) 560 return -ENODEV; 561 562 sh7786_pcie_ports = kcalloc(nr_ports, sizeof(struct sh7786_pcie_port), 563 GFP_KERNEL); 564 if (unlikely(!sh7786_pcie_ports)) 565 return -ENOMEM; 566 567 /* 568 * Fetch any optional platform clock associated with this block. 569 * 570 * This is a rather nasty hack for boards with spec-mocking FPGAs 571 * that have a secondary set of clocks outside of the on-chip 572 * ones that need to be accounted for before there is any chance 573 * of touching the existing MSTP bits or CPG clocks. 574 */ 575 platclk = clk_get(NULL, "pcie_plat_clk"); 576 if (IS_ERR(platclk)) { 577 /* Sane hardware should probably get a WARN_ON.. */ 578 platclk = NULL; 579 } 580 581 clk_enable(platclk); 582 583 mm_sel = sh7786_mm_sel(); 584 585 /* 586 * Depending on the MMSELR register value, the PCIe0 MEM 1 587 * area may not be available. See Table 13.11 of the SH7786 588 * datasheet. 589 */ 590 if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6) 591 sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED; 592 593 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 594 595 for (i = 0; i < nr_ports; i++) { 596 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 597 598 port->index = i; 599 port->hose = sh7786_pci_channels + i; 600 port->hose->io_map_base = port->hose->resources[0].start; 601 602 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 603 } 604 605 async_synchronize_full(); 606 607 return 0; 608 } 609 arch_initcall(sh7786_pcie_init); 610