1 /* 2 * Low-Level PCI Express Support for the SH7786 3 * 4 * Copyright (C) 2009 - 2011 Paul Mundt 5 * 6 * This file is subject to the terms and conditions of the GNU General Public 7 * License. See the file "COPYING" in the main directory of this archive 8 * for more details. 9 */ 10 #define pr_fmt(fmt) "PCI: " fmt 11 12 #include <linux/pci.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/io.h> 16 #include <linux/async.h> 17 #include <linux/delay.h> 18 #include <linux/slab.h> 19 #include <linux/clk.h> 20 #include <linux/sh_clk.h> 21 #include "pcie-sh7786.h" 22 #include <asm/sizes.h> 23 24 struct sh7786_pcie_port { 25 struct pci_channel *hose; 26 struct clk *fclk, phy_clk; 27 unsigned int index; 28 int endpoint; 29 int link; 30 }; 31 32 static struct sh7786_pcie_port *sh7786_pcie_ports; 33 static unsigned int nr_ports; 34 35 static struct sh7786_pcie_hwops { 36 int (*core_init)(void); 37 async_func_ptr *port_init_hw; 38 } *sh7786_pcie_hwops; 39 40 static struct resource sh7786_pci0_resources[] = { 41 { 42 .name = "PCIe0 IO", 43 .start = 0xfd000000, 44 .end = 0xfd000000 + SZ_8M - 1, 45 .flags = IORESOURCE_IO, 46 }, { 47 .name = "PCIe0 MEM 0", 48 .start = 0xc0000000, 49 .end = 0xc0000000 + SZ_512M - 1, 50 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 51 }, { 52 .name = "PCIe0 MEM 1", 53 .start = 0x10000000, 54 .end = 0x10000000 + SZ_64M - 1, 55 .flags = IORESOURCE_MEM, 56 }, { 57 .name = "PCIe0 MEM 2", 58 .start = 0xfe100000, 59 .end = 0xfe100000 + SZ_1M - 1, 60 .flags = IORESOURCE_MEM, 61 }, 62 }; 63 64 static struct resource sh7786_pci1_resources[] = { 65 { 66 .name = "PCIe1 IO", 67 .start = 0xfd800000, 68 .end = 0xfd800000 + SZ_8M - 1, 69 .flags = IORESOURCE_IO, 70 }, { 71 .name = "PCIe1 MEM 0", 72 .start = 0xa0000000, 73 .end = 0xa0000000 + SZ_512M - 1, 74 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 75 }, { 76 .name = "PCIe1 MEM 1", 77 .start = 0x30000000, 78 .end = 0x30000000 + SZ_256M - 1, 79 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 80 }, { 81 .name = "PCIe1 MEM 2", 82 .start = 0xfe300000, 83 .end = 0xfe300000 + SZ_1M - 1, 84 .flags = IORESOURCE_MEM, 85 }, 86 }; 87 88 static struct resource sh7786_pci2_resources[] = { 89 { 90 .name = "PCIe2 IO", 91 .start = 0xfc800000, 92 .end = 0xfc800000 + SZ_4M - 1, 93 .flags = IORESOURCE_IO, 94 }, { 95 .name = "PCIe2 MEM 0", 96 .start = 0x80000000, 97 .end = 0x80000000 + SZ_512M - 1, 98 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 99 }, { 100 .name = "PCIe2 MEM 1", 101 .start = 0x20000000, 102 .end = 0x20000000 + SZ_256M - 1, 103 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 104 }, { 105 .name = "PCIe2 MEM 2", 106 .start = 0xfcd00000, 107 .end = 0xfcd00000 + SZ_1M - 1, 108 .flags = IORESOURCE_MEM, 109 }, 110 }; 111 112 extern struct pci_ops sh7786_pci_ops; 113 114 #define DEFINE_CONTROLLER(start, idx) \ 115 { \ 116 .pci_ops = &sh7786_pci_ops, \ 117 .resources = sh7786_pci##idx##_resources, \ 118 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 119 .reg_base = start, \ 120 .mem_offset = 0, \ 121 .io_offset = 0, \ 122 } 123 124 static struct pci_channel sh7786_pci_channels[] = { 125 DEFINE_CONTROLLER(0xfe000000, 0), 126 DEFINE_CONTROLLER(0xfe200000, 1), 127 DEFINE_CONTROLLER(0xfcc00000, 2), 128 }; 129 130 static struct clk fixed_pciexclkp = { 131 .rate = 100000000, /* 100 MHz reference clock */ 132 }; 133 134 static void __devinit sh7786_pci_fixup(struct pci_dev *dev) 135 { 136 /* 137 * Prevent enumeration of root complex resources. 138 */ 139 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 140 int i; 141 142 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 143 dev->resource[i].start = 0; 144 dev->resource[i].end = 0; 145 dev->resource[i].flags = 0; 146 } 147 } 148 } 149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 150 sh7786_pci_fixup); 151 152 static int __init phy_wait_for_ack(struct pci_channel *chan) 153 { 154 unsigned int timeout = 100; 155 156 while (timeout--) { 157 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 158 return 0; 159 160 udelay(100); 161 } 162 163 return -ETIMEDOUT; 164 } 165 166 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 167 { 168 unsigned int timeout = 100; 169 170 while (timeout--) { 171 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 172 return 0; 173 174 udelay(100); 175 } 176 177 return -ETIMEDOUT; 178 } 179 180 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 181 unsigned int lane, unsigned int data) 182 { 183 unsigned long phyaddr; 184 185 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 186 ((addr & 0xff) << BITS_ADR); 187 188 /* Set write data */ 189 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 190 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 191 192 phy_wait_for_ack(chan); 193 194 /* Clear command */ 195 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 196 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 197 198 phy_wait_for_ack(chan); 199 } 200 201 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 202 { 203 struct pci_channel *chan = port->hose; 204 struct clk *clk; 205 char fclk_name[16]; 206 int ret; 207 208 /* 209 * First register the fixed clock 210 */ 211 ret = clk_register(&fixed_pciexclkp); 212 if (unlikely(ret != 0)) 213 return ret; 214 215 /* 216 * Grab the port's function clock, which the PHY clock depends 217 * on. clock lookups don't help us much at this point, since no 218 * dev_id is available this early. Lame. 219 */ 220 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 221 222 port->fclk = clk_get(NULL, fclk_name); 223 if (IS_ERR(port->fclk)) { 224 ret = PTR_ERR(port->fclk); 225 goto err_fclk; 226 } 227 228 clk_enable(port->fclk); 229 230 /* 231 * And now, set up the PHY clock 232 */ 233 clk = &port->phy_clk; 234 235 memset(clk, 0, sizeof(struct clk)); 236 237 clk->parent = &fixed_pciexclkp; 238 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 239 clk->enable_bit = BITS_CKE; 240 241 ret = sh_clk_mstp32_register(clk, 1); 242 if (unlikely(ret < 0)) 243 goto err_phy; 244 245 return 0; 246 247 err_phy: 248 clk_disable(port->fclk); 249 clk_put(port->fclk); 250 err_fclk: 251 clk_unregister(&fixed_pciexclkp); 252 253 return ret; 254 } 255 256 static int __init phy_init(struct sh7786_pcie_port *port) 257 { 258 struct pci_channel *chan = port->hose; 259 unsigned int timeout = 100; 260 261 clk_enable(&port->phy_clk); 262 263 /* Initialize the phy */ 264 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 265 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 266 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 267 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 268 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 269 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 270 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 271 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 272 273 /* Deassert Standby */ 274 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 275 276 /* Disable clock */ 277 clk_disable(&port->phy_clk); 278 279 while (timeout--) { 280 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 281 return 0; 282 283 udelay(100); 284 } 285 286 return -ETIMEDOUT; 287 } 288 289 static void __init pcie_reset(struct sh7786_pcie_port *port) 290 { 291 struct pci_channel *chan = port->hose; 292 293 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 294 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 295 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 296 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 297 } 298 299 static int __init pcie_init(struct sh7786_pcie_port *port) 300 { 301 struct pci_channel *chan = port->hose; 302 unsigned int data; 303 phys_addr_t memphys; 304 size_t memsize; 305 int ret, i, win; 306 307 /* Begin initialization */ 308 pcie_reset(port); 309 310 /* 311 * Initial header for port config space is type 1, set the device 312 * class to match. Hardware takes care of propagating the IDSETR 313 * settings, so there is no need to bother with a quirk. 314 */ 315 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); 316 317 /* Initialize default capabilities. */ 318 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 319 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 320 321 if (port->endpoint) 322 data |= PCI_EXP_TYPE_ENDPOINT << 20; 323 else 324 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 325 326 data |= PCI_CAP_ID_EXP; 327 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 328 329 /* Enable data link layer active state reporting */ 330 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 331 332 /* Enable extended sync and ASPM L0s support */ 333 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 334 data &= ~PCI_EXP_LNKCTL_ASPMC; 335 data |= PCI_EXP_LNKCTL_ES | 1; 336 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 337 338 /* Write out the physical slot number */ 339 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 340 data &= ~PCI_EXP_SLTCAP_PSN; 341 data |= (port->index + 1) << 19; 342 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 343 344 /* Set the completion timer timeout to the maximum 32ms. */ 345 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 346 data &= ~0x3f00; 347 data |= 0x32 << 8; 348 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 349 350 /* 351 * Set fast training sequences to the maximum 255, 352 * and enable MAC data scrambling. 353 */ 354 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 355 data &= ~PCIEMACCTLR_SCR_DIS; 356 data |= (0xff << 16); 357 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 358 359 memphys = __pa(memory_start); 360 memsize = roundup_pow_of_two(memory_end - memory_start); 361 362 /* 363 * If there's more than 512MB of memory, we need to roll over to 364 * LAR1/LAMR1. 365 */ 366 if (memsize > SZ_512M) { 367 pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1); 368 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 369 SH4A_PCIELAMR1); 370 memsize = SZ_512M; 371 } else { 372 /* 373 * Otherwise just zero it out and disable it. 374 */ 375 pci_write_reg(chan, 0, SH4A_PCIELAR1); 376 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 377 } 378 379 /* 380 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 381 * cover all of lowmem on most platforms. 382 */ 383 pci_write_reg(chan, memphys, SH4A_PCIELAR0); 384 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 385 386 /* Finish initialization */ 387 data = pci_read_reg(chan, SH4A_PCIETCTLR); 388 data |= 0x1; 389 pci_write_reg(chan, data, SH4A_PCIETCTLR); 390 391 /* Let things settle down a bit.. */ 392 mdelay(100); 393 394 /* Enable DL_Active Interrupt generation */ 395 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 396 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 397 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 398 399 /* Disable MAC data scrambling. */ 400 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 401 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 402 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 403 404 /* 405 * This will timeout if we don't have a link, but we permit the 406 * port to register anyways in order to support hotplug on future 407 * hardware. 408 */ 409 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 410 411 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 412 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 413 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 414 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 415 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 416 417 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 418 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 419 420 wmb(); 421 422 if (ret == 0) { 423 data = pci_read_reg(chan, SH4A_PCIEMACSR); 424 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 425 port->index, (data >> 20) & 0x3f); 426 } else 427 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 428 port->index); 429 430 for (i = win = 0; i < chan->nr_resources; i++) { 431 struct resource *res = chan->resources + i; 432 resource_size_t size; 433 u32 mask; 434 435 /* 436 * We can't use the 32-bit mode windows in legacy 29-bit 437 * mode, so just skip them entirely. 438 */ 439 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 440 continue; 441 442 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 443 444 /* 445 * The PAMR mask is calculated in units of 256kB, which 446 * keeps things pretty simple. 447 */ 448 size = resource_size(res); 449 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 450 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 451 452 pci_write_reg(chan, upper_32_bits(res->start), 453 SH4A_PCIEPARH(win)); 454 pci_write_reg(chan, lower_32_bits(res->start), 455 SH4A_PCIEPARL(win)); 456 457 mask = MASK_PARE; 458 if (res->flags & IORESOURCE_IO) 459 mask |= MASK_SPC; 460 461 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 462 463 win++; 464 } 465 466 return 0; 467 } 468 469 int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 470 { 471 return 71; 472 } 473 474 static int __init sh7786_pcie_core_init(void) 475 { 476 /* Return the number of ports */ 477 return test_mode_pin(MODE_PIN12) ? 3 : 2; 478 } 479 480 static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 481 { 482 struct sh7786_pcie_port *port = data; 483 int ret; 484 485 /* 486 * Check if we are configured in endpoint or root complex mode, 487 * this is a fixed pin setting that applies to all PCIe ports. 488 */ 489 port->endpoint = test_mode_pin(MODE_PIN11); 490 491 /* 492 * Setup clocks, needed both for PHY and PCIe registers. 493 */ 494 ret = pcie_clk_init(port); 495 if (unlikely(ret < 0)) { 496 pr_err("clock initialization failed for port#%d\n", 497 port->index); 498 return; 499 } 500 501 ret = phy_init(port); 502 if (unlikely(ret < 0)) { 503 pr_err("phy initialization failed for port#%d\n", 504 port->index); 505 return; 506 } 507 508 ret = pcie_init(port); 509 if (unlikely(ret < 0)) { 510 pr_err("core initialization failed for port#%d\n", 511 port->index); 512 return; 513 } 514 515 /* In the interest of preserving device ordering, synchronize */ 516 async_synchronize_cookie(cookie); 517 518 register_pci_controller(port->hose); 519 } 520 521 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 522 .core_init = sh7786_pcie_core_init, 523 .port_init_hw = sh7786_pcie_init_hw, 524 }; 525 526 static int __init sh7786_pcie_init(void) 527 { 528 struct clk *platclk; 529 int i; 530 531 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 532 533 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 534 535 nr_ports = sh7786_pcie_hwops->core_init(); 536 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 537 538 if (unlikely(nr_ports == 0)) 539 return -ENODEV; 540 541 sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port), 542 GFP_KERNEL); 543 if (unlikely(!sh7786_pcie_ports)) 544 return -ENOMEM; 545 546 /* 547 * Fetch any optional platform clock associated with this block. 548 * 549 * This is a rather nasty hack for boards with spec-mocking FPGAs 550 * that have a secondary set of clocks outside of the on-chip 551 * ones that need to be accounted for before there is any chance 552 * of touching the existing MSTP bits or CPG clocks. 553 */ 554 platclk = clk_get(NULL, "pcie_plat_clk"); 555 if (IS_ERR(platclk)) { 556 /* Sane hardware should probably get a WARN_ON.. */ 557 platclk = NULL; 558 } 559 560 clk_enable(platclk); 561 562 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 563 564 for (i = 0; i < nr_ports; i++) { 565 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 566 567 port->index = i; 568 port->hose = sh7786_pci_channels + i; 569 port->hose->io_map_base = port->hose->resources[0].start; 570 571 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 572 } 573 574 async_synchronize_full(); 575 576 return 0; 577 } 578 arch_initcall(sh7786_pcie_init); 579