1 /* 2 * Low-Level PCI Express Support for the SH7786 3 * 4 * Copyright (C) 2009 - 2011 Paul Mundt 5 * 6 * This file is subject to the terms and conditions of the GNU General Public 7 * License. See the file "COPYING" in the main directory of this archive 8 * for more details. 9 */ 10 #define pr_fmt(fmt) "PCI: " fmt 11 12 #include <linux/pci.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/io.h> 16 #include <linux/async.h> 17 #include <linux/delay.h> 18 #include <linux/slab.h> 19 #include <linux/clk.h> 20 #include <linux/sh_clk.h> 21 #include <linux/sh_intc.h> 22 #include <cpu/sh7786.h> 23 #include "pcie-sh7786.h" 24 #include <asm/sizes.h> 25 26 struct sh7786_pcie_port { 27 struct pci_channel *hose; 28 struct clk *fclk, phy_clk; 29 unsigned int index; 30 int endpoint; 31 int link; 32 }; 33 34 static struct sh7786_pcie_port *sh7786_pcie_ports; 35 static unsigned int nr_ports; 36 static unsigned long dma_pfn_offset; 37 38 static struct sh7786_pcie_hwops { 39 int (*core_init)(void); 40 async_func_t port_init_hw; 41 } *sh7786_pcie_hwops; 42 43 static struct resource sh7786_pci0_resources[] = { 44 { 45 .name = "PCIe0 MEM 0", 46 .start = 0xfd000000, 47 .end = 0xfd000000 + SZ_8M - 1, 48 .flags = IORESOURCE_MEM, 49 }, { 50 .name = "PCIe0 MEM 1", 51 .start = 0xc0000000, 52 .end = 0xc0000000 + SZ_512M - 1, 53 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 54 }, { 55 .name = "PCIe0 MEM 2", 56 .start = 0x10000000, 57 .end = 0x10000000 + SZ_64M - 1, 58 .flags = IORESOURCE_MEM, 59 }, { 60 .name = "PCIe0 IO", 61 .start = 0xfe100000, 62 .end = 0xfe100000 + SZ_1M - 1, 63 .flags = IORESOURCE_IO, 64 }, 65 }; 66 67 static struct resource sh7786_pci1_resources[] = { 68 { 69 .name = "PCIe1 MEM 0", 70 .start = 0xfd800000, 71 .end = 0xfd800000 + SZ_8M - 1, 72 .flags = IORESOURCE_MEM, 73 }, { 74 .name = "PCIe1 MEM 1", 75 .start = 0xa0000000, 76 .end = 0xa0000000 + SZ_512M - 1, 77 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 78 }, { 79 .name = "PCIe1 MEM 2", 80 .start = 0x30000000, 81 .end = 0x30000000 + SZ_256M - 1, 82 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 83 }, { 84 .name = "PCIe1 IO", 85 .start = 0xfe300000, 86 .end = 0xfe300000 + SZ_1M - 1, 87 .flags = IORESOURCE_IO, 88 }, 89 }; 90 91 static struct resource sh7786_pci2_resources[] = { 92 { 93 .name = "PCIe2 MEM 0", 94 .start = 0xfc800000, 95 .end = 0xfc800000 + SZ_4M - 1, 96 .flags = IORESOURCE_MEM, 97 }, { 98 .name = "PCIe2 MEM 1", 99 .start = 0x80000000, 100 .end = 0x80000000 + SZ_512M - 1, 101 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 102 }, { 103 .name = "PCIe2 MEM 2", 104 .start = 0x20000000, 105 .end = 0x20000000 + SZ_256M - 1, 106 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 107 }, { 108 .name = "PCIe2 IO", 109 .start = 0xfcd00000, 110 .end = 0xfcd00000 + SZ_1M - 1, 111 .flags = IORESOURCE_IO, 112 }, 113 }; 114 115 extern struct pci_ops sh7786_pci_ops; 116 117 #define DEFINE_CONTROLLER(start, idx) \ 118 { \ 119 .pci_ops = &sh7786_pci_ops, \ 120 .resources = sh7786_pci##idx##_resources, \ 121 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 122 .reg_base = start, \ 123 .mem_offset = 0, \ 124 .io_offset = 0, \ 125 } 126 127 static struct pci_channel sh7786_pci_channels[] = { 128 DEFINE_CONTROLLER(0xfe000000, 0), 129 DEFINE_CONTROLLER(0xfe200000, 1), 130 DEFINE_CONTROLLER(0xfcc00000, 2), 131 }; 132 133 static struct clk fixed_pciexclkp = { 134 .rate = 100000000, /* 100 MHz reference clock */ 135 }; 136 137 static void sh7786_pci_fixup(struct pci_dev *dev) 138 { 139 /* 140 * Prevent enumeration of root complex resources. 141 */ 142 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 143 int i; 144 145 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 146 dev->resource[i].start = 0; 147 dev->resource[i].end = 0; 148 dev->resource[i].flags = 0; 149 } 150 } 151 } 152 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 153 sh7786_pci_fixup); 154 155 static int __init phy_wait_for_ack(struct pci_channel *chan) 156 { 157 unsigned int timeout = 100; 158 159 while (timeout--) { 160 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 161 return 0; 162 163 udelay(100); 164 } 165 166 return -ETIMEDOUT; 167 } 168 169 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 170 { 171 unsigned int timeout = 100; 172 173 while (timeout--) { 174 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 175 return 0; 176 177 udelay(100); 178 } 179 180 return -ETIMEDOUT; 181 } 182 183 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 184 unsigned int lane, unsigned int data) 185 { 186 unsigned long phyaddr; 187 188 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 189 ((addr & 0xff) << BITS_ADR); 190 191 /* Set write data */ 192 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 193 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 194 195 phy_wait_for_ack(chan); 196 197 /* Clear command */ 198 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 199 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 200 201 phy_wait_for_ack(chan); 202 } 203 204 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 205 { 206 struct pci_channel *chan = port->hose; 207 struct clk *clk; 208 char fclk_name[16]; 209 int ret; 210 211 /* 212 * First register the fixed clock 213 */ 214 ret = clk_register(&fixed_pciexclkp); 215 if (unlikely(ret != 0)) 216 return ret; 217 218 /* 219 * Grab the port's function clock, which the PHY clock depends 220 * on. clock lookups don't help us much at this point, since no 221 * dev_id is available this early. Lame. 222 */ 223 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 224 225 port->fclk = clk_get(NULL, fclk_name); 226 if (IS_ERR(port->fclk)) { 227 ret = PTR_ERR(port->fclk); 228 goto err_fclk; 229 } 230 231 clk_enable(port->fclk); 232 233 /* 234 * And now, set up the PHY clock 235 */ 236 clk = &port->phy_clk; 237 238 memset(clk, 0, sizeof(struct clk)); 239 240 clk->parent = &fixed_pciexclkp; 241 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 242 clk->enable_bit = BITS_CKE; 243 244 ret = sh_clk_mstp_register(clk, 1); 245 if (unlikely(ret < 0)) 246 goto err_phy; 247 248 return 0; 249 250 err_phy: 251 clk_disable(port->fclk); 252 clk_put(port->fclk); 253 err_fclk: 254 clk_unregister(&fixed_pciexclkp); 255 256 return ret; 257 } 258 259 static int __init phy_init(struct sh7786_pcie_port *port) 260 { 261 struct pci_channel *chan = port->hose; 262 unsigned int timeout = 100; 263 264 clk_enable(&port->phy_clk); 265 266 /* Initialize the phy */ 267 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 268 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 269 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 270 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 271 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 272 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 273 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 274 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 275 276 /* Deassert Standby */ 277 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 278 279 /* Disable clock */ 280 clk_disable(&port->phy_clk); 281 282 while (timeout--) { 283 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 284 return 0; 285 286 udelay(100); 287 } 288 289 return -ETIMEDOUT; 290 } 291 292 static void __init pcie_reset(struct sh7786_pcie_port *port) 293 { 294 struct pci_channel *chan = port->hose; 295 296 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 297 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 298 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 299 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 300 } 301 302 static int __init pcie_init(struct sh7786_pcie_port *port) 303 { 304 struct pci_channel *chan = port->hose; 305 unsigned int data; 306 phys_addr_t memstart, memend; 307 size_t memsize; 308 int ret, i, win; 309 310 /* Begin initialization */ 311 pcie_reset(port); 312 313 /* 314 * Initial header for port config space is type 1, set the device 315 * class to match. Hardware takes care of propagating the IDSETR 316 * settings, so there is no need to bother with a quirk. 317 */ 318 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); 319 320 /* Initialize default capabilities. */ 321 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 322 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 323 324 if (port->endpoint) 325 data |= PCI_EXP_TYPE_ENDPOINT << 20; 326 else 327 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 328 329 data |= PCI_CAP_ID_EXP; 330 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 331 332 /* Enable data link layer active state reporting */ 333 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 334 335 /* Enable extended sync and ASPM L0s support */ 336 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 337 data &= ~PCI_EXP_LNKCTL_ASPMC; 338 data |= PCI_EXP_LNKCTL_ES | 1; 339 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 340 341 /* Write out the physical slot number */ 342 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 343 data &= ~PCI_EXP_SLTCAP_PSN; 344 data |= (port->index + 1) << 19; 345 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 346 347 /* Set the completion timer timeout to the maximum 32ms. */ 348 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 349 data &= ~0x3f00; 350 data |= 0x32 << 8; 351 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 352 353 /* 354 * Set fast training sequences to the maximum 255, 355 * and enable MAC data scrambling. 356 */ 357 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 358 data &= ~PCIEMACCTLR_SCR_DIS; 359 data |= (0xff << 16); 360 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 361 362 memstart = __pa(memory_start); 363 memend = __pa(memory_end); 364 memsize = roundup_pow_of_two(memend - memstart); 365 366 /* 367 * The start address must be aligned on its size. So we round 368 * it down, and then recalculate the size so that it covers 369 * the entire memory. 370 */ 371 memstart = ALIGN_DOWN(memstart, memsize); 372 memsize = roundup_pow_of_two(memend - memstart); 373 374 dma_pfn_offset = memstart >> PAGE_SHIFT; 375 376 /* 377 * If there's more than 512MB of memory, we need to roll over to 378 * LAR1/LAMR1. 379 */ 380 if (memsize > SZ_512M) { 381 pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1); 382 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 383 SH4A_PCIELAMR1); 384 memsize = SZ_512M; 385 } else { 386 /* 387 * Otherwise just zero it out and disable it. 388 */ 389 pci_write_reg(chan, 0, SH4A_PCIELAR1); 390 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 391 } 392 393 /* 394 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 395 * cover all of lowmem on most platforms. 396 */ 397 pci_write_reg(chan, memstart, SH4A_PCIELAR0); 398 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 399 400 /* Finish initialization */ 401 data = pci_read_reg(chan, SH4A_PCIETCTLR); 402 data |= 0x1; 403 pci_write_reg(chan, data, SH4A_PCIETCTLR); 404 405 /* Let things settle down a bit.. */ 406 mdelay(100); 407 408 /* Enable DL_Active Interrupt generation */ 409 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 410 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 411 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 412 413 /* Disable MAC data scrambling. */ 414 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 415 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 416 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 417 418 /* 419 * This will timeout if we don't have a link, but we permit the 420 * port to register anyways in order to support hotplug on future 421 * hardware. 422 */ 423 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 424 425 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 426 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 427 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 428 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 429 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 430 431 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 432 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 433 434 wmb(); 435 436 if (ret == 0) { 437 data = pci_read_reg(chan, SH4A_PCIEMACSR); 438 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 439 port->index, (data >> 20) & 0x3f); 440 } else 441 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 442 port->index); 443 444 for (i = win = 0; i < chan->nr_resources; i++) { 445 struct resource *res = chan->resources + i; 446 resource_size_t size; 447 u32 mask; 448 449 /* 450 * We can't use the 32-bit mode windows in legacy 29-bit 451 * mode, so just skip them entirely. 452 */ 453 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 454 res->flags |= IORESOURCE_DISABLED; 455 456 if (res->flags & IORESOURCE_DISABLED) 457 continue; 458 459 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 460 461 /* 462 * The PAMR mask is calculated in units of 256kB, which 463 * keeps things pretty simple. 464 */ 465 size = resource_size(res); 466 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 467 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 468 469 pci_write_reg(chan, upper_32_bits(res->start), 470 SH4A_PCIEPARH(win)); 471 pci_write_reg(chan, lower_32_bits(res->start), 472 SH4A_PCIEPARL(win)); 473 474 mask = MASK_PARE; 475 if (res->flags & IORESOURCE_IO) 476 mask |= MASK_SPC; 477 478 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 479 480 win++; 481 } 482 483 return 0; 484 } 485 486 int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 487 { 488 return evt2irq(0xae0); 489 } 490 491 void pcibios_bus_add_device(struct pci_dev *pdev) 492 { 493 pdev->dev.dma_pfn_offset = dma_pfn_offset; 494 } 495 496 static int __init sh7786_pcie_core_init(void) 497 { 498 /* Return the number of ports */ 499 return test_mode_pin(MODE_PIN12) ? 3 : 2; 500 } 501 502 static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 503 { 504 struct sh7786_pcie_port *port = data; 505 int ret; 506 507 /* 508 * Check if we are configured in endpoint or root complex mode, 509 * this is a fixed pin setting that applies to all PCIe ports. 510 */ 511 port->endpoint = test_mode_pin(MODE_PIN11); 512 513 /* 514 * Setup clocks, needed both for PHY and PCIe registers. 515 */ 516 ret = pcie_clk_init(port); 517 if (unlikely(ret < 0)) { 518 pr_err("clock initialization failed for port#%d\n", 519 port->index); 520 return; 521 } 522 523 ret = phy_init(port); 524 if (unlikely(ret < 0)) { 525 pr_err("phy initialization failed for port#%d\n", 526 port->index); 527 return; 528 } 529 530 ret = pcie_init(port); 531 if (unlikely(ret < 0)) { 532 pr_err("core initialization failed for port#%d\n", 533 port->index); 534 return; 535 } 536 537 /* In the interest of preserving device ordering, synchronize */ 538 async_synchronize_cookie(cookie); 539 540 register_pci_controller(port->hose); 541 } 542 543 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 544 .core_init = sh7786_pcie_core_init, 545 .port_init_hw = sh7786_pcie_init_hw, 546 }; 547 548 static int __init sh7786_pcie_init(void) 549 { 550 struct clk *platclk; 551 u32 mm_sel; 552 int i; 553 554 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 555 556 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 557 558 nr_ports = sh7786_pcie_hwops->core_init(); 559 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 560 561 if (unlikely(nr_ports == 0)) 562 return -ENODEV; 563 564 sh7786_pcie_ports = kcalloc(nr_ports, sizeof(struct sh7786_pcie_port), 565 GFP_KERNEL); 566 if (unlikely(!sh7786_pcie_ports)) 567 return -ENOMEM; 568 569 /* 570 * Fetch any optional platform clock associated with this block. 571 * 572 * This is a rather nasty hack for boards with spec-mocking FPGAs 573 * that have a secondary set of clocks outside of the on-chip 574 * ones that need to be accounted for before there is any chance 575 * of touching the existing MSTP bits or CPG clocks. 576 */ 577 platclk = clk_get(NULL, "pcie_plat_clk"); 578 if (IS_ERR(platclk)) { 579 /* Sane hardware should probably get a WARN_ON.. */ 580 platclk = NULL; 581 } 582 583 clk_enable(platclk); 584 585 mm_sel = sh7786_mm_sel(); 586 587 /* 588 * Depending on the MMSELR register value, the PCIe0 MEM 1 589 * area may not be available. See Table 13.11 of the SH7786 590 * datasheet. 591 */ 592 if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6) 593 sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED; 594 595 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 596 597 for (i = 0; i < nr_ports; i++) { 598 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 599 600 port->index = i; 601 port->hose = sh7786_pci_channels + i; 602 port->hose->io_map_base = port->hose->resources[0].start; 603 604 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 605 } 606 607 async_synchronize_full(); 608 609 return 0; 610 } 611 arch_initcall(sh7786_pcie_init); 612