1 /* 2 * Low-Level PCI Express Support for the SH7786 3 * 4 * Copyright (C) 2009 - 2010 Paul Mundt 5 * 6 * This file is subject to the terms and conditions of the GNU General Public 7 * License. See the file "COPYING" in the main directory of this archive 8 * for more details. 9 */ 10 #include <linux/pci.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/io.h> 14 #include <linux/delay.h> 15 #include <linux/slab.h> 16 #include <linux/clk.h> 17 #include <linux/sh_clk.h> 18 #include "pcie-sh7786.h" 19 #include <asm/sizes.h> 20 21 struct sh7786_pcie_port { 22 struct pci_channel *hose; 23 struct clk *fclk, phy_clk; 24 unsigned int index; 25 int endpoint; 26 int link; 27 }; 28 29 static struct sh7786_pcie_port *sh7786_pcie_ports; 30 static unsigned int nr_ports; 31 32 static struct sh7786_pcie_hwops { 33 int (*core_init)(void); 34 int (*port_init_hw)(struct sh7786_pcie_port *port); 35 } *sh7786_pcie_hwops; 36 37 static struct resource sh7786_pci0_resources[] = { 38 { 39 .name = "PCIe0 IO", 40 .start = 0xfd000000, 41 .end = 0xfd000000 + SZ_8M - 1, 42 .flags = IORESOURCE_IO, 43 }, { 44 .name = "PCIe0 MEM 0", 45 .start = 0xc0000000, 46 .end = 0xc0000000 + SZ_512M - 1, 47 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 48 }, { 49 .name = "PCIe0 MEM 1", 50 .start = 0x10000000, 51 .end = 0x10000000 + SZ_64M - 1, 52 .flags = IORESOURCE_MEM, 53 }, { 54 .name = "PCIe0 MEM 2", 55 .start = 0xfe100000, 56 .end = 0xfe100000 + SZ_1M - 1, 57 .flags = IORESOURCE_MEM, 58 }, 59 }; 60 61 static struct resource sh7786_pci1_resources[] = { 62 { 63 .name = "PCIe1 IO", 64 .start = 0xfd800000, 65 .end = 0xfd800000 + SZ_8M - 1, 66 .flags = IORESOURCE_IO, 67 }, { 68 .name = "PCIe1 MEM 0", 69 .start = 0xa0000000, 70 .end = 0xa0000000 + SZ_512M - 1, 71 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 72 }, { 73 .name = "PCIe1 MEM 1", 74 .start = 0x30000000, 75 .end = 0x30000000 + SZ_256M - 1, 76 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 77 }, { 78 .name = "PCIe1 MEM 2", 79 .start = 0xfe300000, 80 .end = 0xfe300000 + SZ_1M - 1, 81 .flags = IORESOURCE_MEM, 82 }, 83 }; 84 85 static struct resource sh7786_pci2_resources[] = { 86 { 87 .name = "PCIe2 IO", 88 .start = 0xfc800000, 89 .end = 0xfc800000 + SZ_4M - 1, 90 .flags = IORESOURCE_IO, 91 }, { 92 .name = "PCIe2 MEM 0", 93 .start = 0x80000000, 94 .end = 0x80000000 + SZ_512M - 1, 95 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 96 }, { 97 .name = "PCIe2 MEM 1", 98 .start = 0x20000000, 99 .end = 0x20000000 + SZ_256M - 1, 100 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 101 }, { 102 .name = "PCIe2 MEM 2", 103 .start = 0xfcd00000, 104 .end = 0xfcd00000 + SZ_1M - 1, 105 .flags = IORESOURCE_MEM, 106 }, 107 }; 108 109 extern struct pci_ops sh7786_pci_ops; 110 111 #define DEFINE_CONTROLLER(start, idx) \ 112 { \ 113 .pci_ops = &sh7786_pci_ops, \ 114 .resources = sh7786_pci##idx##_resources, \ 115 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 116 .reg_base = start, \ 117 .mem_offset = 0, \ 118 .io_offset = 0, \ 119 } 120 121 static struct pci_channel sh7786_pci_channels[] = { 122 DEFINE_CONTROLLER(0xfe000000, 0), 123 DEFINE_CONTROLLER(0xfe200000, 1), 124 DEFINE_CONTROLLER(0xfcc00000, 2), 125 }; 126 127 static struct clk fixed_pciexclkp = { 128 .rate = 100000000, /* 100 MHz reference clock */ 129 }; 130 131 static void __devinit sh7786_pci_fixup(struct pci_dev *dev) 132 { 133 /* 134 * Prevent enumeration of root complex resources. 135 */ 136 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 137 int i; 138 139 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 140 dev->resource[i].start = 0; 141 dev->resource[i].end = 0; 142 dev->resource[i].flags = 0; 143 } 144 } 145 } 146 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 147 sh7786_pci_fixup); 148 149 static int __init phy_wait_for_ack(struct pci_channel *chan) 150 { 151 unsigned int timeout = 100; 152 153 while (timeout--) { 154 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 155 return 0; 156 157 udelay(100); 158 } 159 160 return -ETIMEDOUT; 161 } 162 163 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 164 { 165 unsigned int timeout = 100; 166 167 while (timeout--) { 168 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 169 return 0; 170 171 udelay(100); 172 } 173 174 return -ETIMEDOUT; 175 } 176 177 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 178 unsigned int lane, unsigned int data) 179 { 180 unsigned long phyaddr; 181 182 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 183 ((addr & 0xff) << BITS_ADR); 184 185 /* Set write data */ 186 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 187 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 188 189 phy_wait_for_ack(chan); 190 191 /* Clear command */ 192 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 193 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 194 195 phy_wait_for_ack(chan); 196 } 197 198 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 199 { 200 struct pci_channel *chan = port->hose; 201 struct clk *clk; 202 char fclk_name[16]; 203 int ret; 204 205 /* 206 * First register the fixed clock 207 */ 208 ret = clk_register(&fixed_pciexclkp); 209 if (unlikely(ret != 0)) 210 return ret; 211 212 /* 213 * Grab the port's function clock, which the PHY clock depends 214 * on. clock lookups don't help us much at this point, since no 215 * dev_id is available this early. Lame. 216 */ 217 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 218 219 port->fclk = clk_get(NULL, fclk_name); 220 if (IS_ERR(port->fclk)) { 221 ret = PTR_ERR(port->fclk); 222 goto err_fclk; 223 } 224 225 clk_enable(port->fclk); 226 227 /* 228 * And now, set up the PHY clock 229 */ 230 clk = &port->phy_clk; 231 232 memset(clk, 0, sizeof(struct clk)); 233 234 clk->parent = &fixed_pciexclkp; 235 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 236 clk->enable_bit = BITS_CKE; 237 238 ret = sh_clk_mstp32_register(clk, 1); 239 if (unlikely(ret < 0)) 240 goto err_phy; 241 242 return 0; 243 244 err_phy: 245 clk_disable(port->fclk); 246 clk_put(port->fclk); 247 err_fclk: 248 clk_unregister(&fixed_pciexclkp); 249 250 return ret; 251 } 252 253 static int __init phy_init(struct sh7786_pcie_port *port) 254 { 255 struct pci_channel *chan = port->hose; 256 unsigned int timeout = 100; 257 258 clk_enable(&port->phy_clk); 259 260 /* Initialize the phy */ 261 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 262 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 263 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 264 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 265 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 266 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 267 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 268 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 269 270 /* Deassert Standby */ 271 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 272 273 /* Disable clock */ 274 clk_disable(&port->phy_clk); 275 276 while (timeout--) { 277 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 278 return 0; 279 280 udelay(100); 281 } 282 283 return -ETIMEDOUT; 284 } 285 286 static void __init pcie_reset(struct sh7786_pcie_port *port) 287 { 288 struct pci_channel *chan = port->hose; 289 290 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 291 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 292 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 293 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 294 } 295 296 static int __init pcie_init(struct sh7786_pcie_port *port) 297 { 298 struct pci_channel *chan = port->hose; 299 unsigned int data; 300 phys_addr_t memphys; 301 size_t memsize; 302 int ret, i, win; 303 304 /* Begin initialization */ 305 pcie_reset(port); 306 307 /* 308 * Initial header for port config space is type 1, set the device 309 * class to match. Hardware takes care of propagating the IDSETR 310 * settings, so there is no need to bother with a quirk. 311 */ 312 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); 313 314 /* Initialize default capabilities. */ 315 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 316 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 317 318 if (port->endpoint) 319 data |= PCI_EXP_TYPE_ENDPOINT << 20; 320 else 321 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 322 323 data |= PCI_CAP_ID_EXP; 324 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 325 326 /* Enable data link layer active state reporting */ 327 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 328 329 /* Enable extended sync and ASPM L0s support */ 330 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 331 data &= ~PCI_EXP_LNKCTL_ASPMC; 332 data |= PCI_EXP_LNKCTL_ES | 1; 333 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 334 335 /* Write out the physical slot number */ 336 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 337 data &= ~PCI_EXP_SLTCAP_PSN; 338 data |= (port->index + 1) << 19; 339 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 340 341 /* Set the completion timer timeout to the maximum 32ms. */ 342 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 343 data &= ~0x3f00; 344 data |= 0x32 << 8; 345 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 346 347 /* 348 * Set fast training sequences to the maximum 255, 349 * and enable MAC data scrambling. 350 */ 351 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 352 data &= ~PCIEMACCTLR_SCR_DIS; 353 data |= (0xff << 16); 354 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 355 356 memphys = __pa(memory_start); 357 memsize = roundup_pow_of_two(memory_end - memory_start); 358 359 /* 360 * If there's more than 512MB of memory, we need to roll over to 361 * LAR1/LAMR1. 362 */ 363 if (memsize > SZ_512M) { 364 pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1); 365 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 366 SH4A_PCIELAMR1); 367 memsize = SZ_512M; 368 } else { 369 /* 370 * Otherwise just zero it out and disable it. 371 */ 372 pci_write_reg(chan, 0, SH4A_PCIELAR1); 373 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 374 } 375 376 /* 377 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 378 * cover all of lowmem on most platforms. 379 */ 380 pci_write_reg(chan, memphys, SH4A_PCIELAR0); 381 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 382 383 /* Finish initialization */ 384 data = pci_read_reg(chan, SH4A_PCIETCTLR); 385 data |= 0x1; 386 pci_write_reg(chan, data, SH4A_PCIETCTLR); 387 388 /* Let things settle down a bit.. */ 389 mdelay(100); 390 391 /* Enable DL_Active Interrupt generation */ 392 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 393 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 394 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 395 396 /* Disable MAC data scrambling. */ 397 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 398 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 399 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 400 401 /* 402 * This will timeout if we don't have a link, but we permit the 403 * port to register anyways in order to support hotplug on future 404 * hardware. 405 */ 406 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 407 408 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 409 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 410 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 411 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 412 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 413 414 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 415 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 416 417 wmb(); 418 419 if (ret == 0) { 420 data = pci_read_reg(chan, SH4A_PCIEMACSR); 421 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 422 port->index, (data >> 20) & 0x3f); 423 } else 424 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 425 port->index); 426 427 for (i = win = 0; i < chan->nr_resources; i++) { 428 struct resource *res = chan->resources + i; 429 resource_size_t size; 430 u32 mask; 431 432 /* 433 * We can't use the 32-bit mode windows in legacy 29-bit 434 * mode, so just skip them entirely. 435 */ 436 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 437 continue; 438 439 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 440 441 /* 442 * The PAMR mask is calculated in units of 256kB, which 443 * keeps things pretty simple. 444 */ 445 size = resource_size(res); 446 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 447 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 448 449 pci_write_reg(chan, upper_32_bits(res->start), 450 SH4A_PCIEPARH(win)); 451 pci_write_reg(chan, lower_32_bits(res->start), 452 SH4A_PCIEPARL(win)); 453 454 mask = MASK_PARE; 455 if (res->flags & IORESOURCE_IO) 456 mask |= MASK_SPC; 457 458 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 459 460 win++; 461 } 462 463 return 0; 464 } 465 466 int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) 467 { 468 return 71; 469 } 470 471 static int __init sh7786_pcie_core_init(void) 472 { 473 /* Return the number of ports */ 474 return test_mode_pin(MODE_PIN12) ? 3 : 2; 475 } 476 477 static int __init sh7786_pcie_init_hw(struct sh7786_pcie_port *port) 478 { 479 int ret; 480 481 /* 482 * Check if we are configured in endpoint or root complex mode, 483 * this is a fixed pin setting that applies to all PCIe ports. 484 */ 485 port->endpoint = test_mode_pin(MODE_PIN11); 486 487 /* 488 * Setup clocks, needed both for PHY and PCIe registers. 489 */ 490 ret = pcie_clk_init(port); 491 if (unlikely(ret < 0)) 492 return ret; 493 494 ret = phy_init(port); 495 if (unlikely(ret < 0)) 496 return ret; 497 498 ret = pcie_init(port); 499 if (unlikely(ret < 0)) 500 return ret; 501 502 return register_pci_controller(port->hose); 503 } 504 505 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 506 .core_init = sh7786_pcie_core_init, 507 .port_init_hw = sh7786_pcie_init_hw, 508 }; 509 510 static int __init sh7786_pcie_init(void) 511 { 512 struct clk *platclk; 513 int ret = 0, i; 514 515 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 516 517 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 518 519 nr_ports = sh7786_pcie_hwops->core_init(); 520 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 521 522 if (unlikely(nr_ports == 0)) 523 return -ENODEV; 524 525 sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port), 526 GFP_KERNEL); 527 if (unlikely(!sh7786_pcie_ports)) 528 return -ENOMEM; 529 530 /* 531 * Fetch any optional platform clock associated with this block. 532 * 533 * This is a rather nasty hack for boards with spec-mocking FPGAs 534 * that have a secondary set of clocks outside of the on-chip 535 * ones that need to be accounted for before there is any chance 536 * of touching the existing MSTP bits or CPG clocks. 537 */ 538 platclk = clk_get(NULL, "pcie_plat_clk"); 539 if (IS_ERR(platclk)) { 540 /* Sane hardware should probably get a WARN_ON.. */ 541 platclk = NULL; 542 } 543 544 clk_enable(platclk); 545 546 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 547 548 for (i = 0; i < nr_ports; i++) { 549 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 550 551 port->index = i; 552 port->hose = sh7786_pci_channels + i; 553 port->hose->io_map_base = port->hose->resources[0].start; 554 555 ret |= sh7786_pcie_hwops->port_init_hw(port); 556 } 557 558 if (unlikely(ret)) { 559 clk_disable(platclk); 560 clk_put(platclk); 561 return ret; 562 } 563 564 return 0; 565 } 566 arch_initcall(sh7786_pcie_init); 567