1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/delay.h> 17 #include <linux/string.h> 18 #include <linux/init.h> 19 #include <linux/bootmem.h> 20 #include <linux/irq.h> 21 #include <linux/io.h> 22 #include <linux/msi.h> 23 24 #include <asm/sections.h> 25 #include <asm/io.h> 26 #include <asm/prom.h> 27 #include <asm/pci-bridge.h> 28 #include <asm/machdep.h> 29 #include <asm/ppc-pci.h> 30 #include <asm/opal.h> 31 #include <asm/iommu.h> 32 #include <asm/tce.h> 33 34 #include "powernv.h" 35 #include "pci.h" 36 37 static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe, 38 struct va_format *vaf) 39 { 40 char pfix[32]; 41 42 if (pe->pdev) 43 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); 44 else 45 sprintf(pfix, "%04x:%02x ", 46 pci_domain_nr(pe->pbus), pe->pbus->number); 47 return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf); 48 } 49 50 #define define_pe_printk_level(func, kern_level) \ 51 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 52 { \ 53 struct va_format vaf; \ 54 va_list args; \ 55 int r; \ 56 \ 57 va_start(args, fmt); \ 58 \ 59 vaf.fmt = fmt; \ 60 vaf.va = &args; \ 61 \ 62 r = __pe_printk(kern_level, pe, &vaf); \ 63 va_end(args); \ 64 \ 65 return r; \ 66 } \ 67 68 define_pe_printk_level(pe_err, KERN_ERR); 69 define_pe_printk_level(pe_warn, KERN_WARNING); 70 define_pe_printk_level(pe_info, KERN_INFO); 71 72 static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev) 73 { 74 struct device_node *np; 75 76 np = pci_device_to_OF_node(dev); 77 if (!np) 78 return NULL; 79 return PCI_DN(np); 80 } 81 82 static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb) 83 { 84 unsigned long pe; 85 86 do { 87 pe = find_next_zero_bit(phb->ioda.pe_alloc, 88 phb->ioda.total_pe, 0); 89 if (pe >= phb->ioda.total_pe) 90 return IODA_INVALID_PE; 91 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 92 93 phb->ioda.pe_array[pe].pe_number = pe; 94 return pe; 95 } 96 97 static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 98 { 99 WARN_ON(phb->ioda.pe_array[pe].pdev); 100 101 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 102 clear_bit(pe, phb->ioda.pe_alloc); 103 } 104 105 /* Currently those 2 are only used when MSIs are enabled, this will change 106 * but in the meantime, we need to protect them to avoid warnings 107 */ 108 #ifdef CONFIG_PCI_MSI 109 static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev) 110 { 111 struct pci_controller *hose = pci_bus_to_host(dev->bus); 112 struct pnv_phb *phb = hose->private_data; 113 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 114 115 if (!pdn) 116 return NULL; 117 if (pdn->pe_number == IODA_INVALID_PE) 118 return NULL; 119 return &phb->ioda.pe_array[pdn->pe_number]; 120 } 121 #endif /* CONFIG_PCI_MSI */ 122 123 static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, 124 struct pnv_ioda_pe *pe) 125 { 126 struct pci_dev *parent; 127 uint8_t bcomp, dcomp, fcomp; 128 long rc, rid_end, rid; 129 130 /* Bus validation ? */ 131 if (pe->pbus) { 132 int count; 133 134 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 135 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 136 parent = pe->pbus->self; 137 if (pe->flags & PNV_IODA_PE_BUS_ALL) 138 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 139 else 140 count = 1; 141 142 switch(count) { 143 case 1: bcomp = OpalPciBusAll; break; 144 case 2: bcomp = OpalPciBus7Bits; break; 145 case 4: bcomp = OpalPciBus6Bits; break; 146 case 8: bcomp = OpalPciBus5Bits; break; 147 case 16: bcomp = OpalPciBus4Bits; break; 148 case 32: bcomp = OpalPciBus3Bits; break; 149 default: 150 pr_err("%s: Number of subordinate busses %d" 151 " unsupported\n", 152 pci_name(pe->pbus->self), count); 153 /* Do an exact match only */ 154 bcomp = OpalPciBusAll; 155 } 156 rid_end = pe->rid + (count << 8); 157 } else { 158 parent = pe->pdev->bus->self; 159 bcomp = OpalPciBusAll; 160 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 161 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 162 rid_end = pe->rid + 1; 163 } 164 165 /* Associate PE in PELT */ 166 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 167 bcomp, dcomp, fcomp, OPAL_MAP_PE); 168 if (rc) { 169 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 170 return -ENXIO; 171 } 172 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 173 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 174 175 /* Add to all parents PELT-V */ 176 while (parent) { 177 struct pci_dn *pdn = pnv_ioda_get_pdn(parent); 178 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 179 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 180 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 181 /* XXX What to do in case of error ? */ 182 } 183 parent = parent->bus->self; 184 } 185 /* Setup reverse map */ 186 for (rid = pe->rid; rid < rid_end; rid++) 187 phb->ioda.pe_rmap[rid] = pe->pe_number; 188 189 /* Setup one MVTs on IODA1 */ 190 if (phb->type == PNV_PHB_IODA1) { 191 pe->mve_number = pe->pe_number; 192 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 193 pe->pe_number); 194 if (rc) { 195 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 196 rc, pe->mve_number); 197 pe->mve_number = -1; 198 } else { 199 rc = opal_pci_set_mve_enable(phb->opal_id, 200 pe->mve_number, OPAL_ENABLE_MVE); 201 if (rc) { 202 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 203 rc, pe->mve_number); 204 pe->mve_number = -1; 205 } 206 } 207 } else if (phb->type == PNV_PHB_IODA2) 208 pe->mve_number = 0; 209 210 return 0; 211 } 212 213 static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 214 struct pnv_ioda_pe *pe) 215 { 216 struct pnv_ioda_pe *lpe; 217 218 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 219 if (lpe->dma_weight < pe->dma_weight) { 220 list_add_tail(&pe->dma_link, &lpe->dma_link); 221 return; 222 } 223 } 224 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 225 } 226 227 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 228 { 229 /* This is quite simplistic. The "base" weight of a device 230 * is 10. 0 means no DMA is to be accounted for it. 231 */ 232 233 /* If it's a bridge, no DMA */ 234 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 235 return 0; 236 237 /* Reduce the weight of slow USB controllers */ 238 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 239 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 240 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 241 return 3; 242 243 /* Increase the weight of RAID (includes Obsidian) */ 244 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 245 return 15; 246 247 /* Default */ 248 return 10; 249 } 250 251 #if 0 252 static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev) 253 { 254 struct pci_controller *hose = pci_bus_to_host(dev->bus); 255 struct pnv_phb *phb = hose->private_data; 256 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 257 struct pnv_ioda_pe *pe; 258 int pe_num; 259 260 if (!pdn) { 261 pr_err("%s: Device tree node not associated properly\n", 262 pci_name(dev)); 263 return NULL; 264 } 265 if (pdn->pe_number != IODA_INVALID_PE) 266 return NULL; 267 268 /* PE#0 has been pre-set */ 269 if (dev->bus->number == 0) 270 pe_num = 0; 271 else 272 pe_num = pnv_ioda_alloc_pe(phb); 273 if (pe_num == IODA_INVALID_PE) { 274 pr_warning("%s: Not enough PE# available, disabling device\n", 275 pci_name(dev)); 276 return NULL; 277 } 278 279 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 280 * pointer in the PE data structure, both should be destroyed at the 281 * same time. However, this needs to be looked at more closely again 282 * once we actually start removing things (Hotplug, SR-IOV, ...) 283 * 284 * At some point we want to remove the PDN completely anyways 285 */ 286 pe = &phb->ioda.pe_array[pe_num]; 287 pci_dev_get(dev); 288 pdn->pcidev = dev; 289 pdn->pe_number = pe_num; 290 pe->pdev = dev; 291 pe->pbus = NULL; 292 pe->tce32_seg = -1; 293 pe->mve_number = -1; 294 pe->rid = dev->bus->number << 8 | pdn->devfn; 295 296 pe_info(pe, "Associated device to PE\n"); 297 298 if (pnv_ioda_configure_pe(phb, pe)) { 299 /* XXX What do we do here ? */ 300 if (pe_num) 301 pnv_ioda_free_pe(phb, pe_num); 302 pdn->pe_number = IODA_INVALID_PE; 303 pe->pdev = NULL; 304 pci_dev_put(dev); 305 return NULL; 306 } 307 308 /* Assign a DMA weight to the device */ 309 pe->dma_weight = pnv_ioda_dma_weight(dev); 310 if (pe->dma_weight != 0) { 311 phb->ioda.dma_weight += pe->dma_weight; 312 phb->ioda.dma_pe_count++; 313 } 314 315 /* Link the PE */ 316 pnv_ioda_link_pe_by_weight(phb, pe); 317 318 return pe; 319 } 320 #endif /* Useful for SRIOV case */ 321 322 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 323 { 324 struct pci_dev *dev; 325 326 list_for_each_entry(dev, &bus->devices, bus_list) { 327 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 328 329 if (pdn == NULL) { 330 pr_warn("%s: No device node associated with device !\n", 331 pci_name(dev)); 332 continue; 333 } 334 pci_dev_get(dev); 335 pdn->pcidev = dev; 336 pdn->pe_number = pe->pe_number; 337 pe->dma_weight += pnv_ioda_dma_weight(dev); 338 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 339 pnv_ioda_setup_same_PE(dev->subordinate, pe); 340 } 341 } 342 343 /* 344 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 345 * single PCI bus. Another one that contains the primary PCI bus and its 346 * subordinate PCI devices and buses. The second type of PE is normally 347 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 348 */ 349 static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 350 { 351 struct pci_controller *hose = pci_bus_to_host(bus); 352 struct pnv_phb *phb = hose->private_data; 353 struct pnv_ioda_pe *pe; 354 int pe_num; 355 356 pe_num = pnv_ioda_alloc_pe(phb); 357 if (pe_num == IODA_INVALID_PE) { 358 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 359 __func__, pci_domain_nr(bus), bus->number); 360 return; 361 } 362 363 pe = &phb->ioda.pe_array[pe_num]; 364 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 365 pe->pbus = bus; 366 pe->pdev = NULL; 367 pe->tce32_seg = -1; 368 pe->mve_number = -1; 369 pe->rid = bus->busn_res.start << 8; 370 pe->dma_weight = 0; 371 372 if (all) 373 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 374 bus->busn_res.start, bus->busn_res.end, pe_num); 375 else 376 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 377 bus->busn_res.start, pe_num); 378 379 if (pnv_ioda_configure_pe(phb, pe)) { 380 /* XXX What do we do here ? */ 381 if (pe_num) 382 pnv_ioda_free_pe(phb, pe_num); 383 pe->pbus = NULL; 384 return; 385 } 386 387 /* Associate it with all child devices */ 388 pnv_ioda_setup_same_PE(bus, pe); 389 390 /* Put PE to the list */ 391 list_add_tail(&pe->list, &phb->ioda.pe_list); 392 393 /* Account for one DMA PE if at least one DMA capable device exist 394 * below the bridge 395 */ 396 if (pe->dma_weight != 0) { 397 phb->ioda.dma_weight += pe->dma_weight; 398 phb->ioda.dma_pe_count++; 399 } 400 401 /* Link the PE */ 402 pnv_ioda_link_pe_by_weight(phb, pe); 403 } 404 405 static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus) 406 { 407 struct pci_dev *dev; 408 409 pnv_ioda_setup_bus_PE(bus, 0); 410 411 list_for_each_entry(dev, &bus->devices, bus_list) { 412 if (dev->subordinate) { 413 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 414 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 415 else 416 pnv_ioda_setup_PEs(dev->subordinate); 417 } 418 } 419 } 420 421 /* 422 * Configure PEs so that the downstream PCI buses and devices 423 * could have their associated PE#. Unfortunately, we didn't 424 * figure out the way to identify the PLX bridge yet. So we 425 * simply put the PCI bus and the subordinate behind the root 426 * port to PE# here. The game rule here is expected to be changed 427 * as soon as we can detected PLX bridge correctly. 428 */ 429 static void __devinit pnv_pci_ioda_setup_PEs(void) 430 { 431 struct pci_controller *hose, *tmp; 432 433 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 434 pnv_ioda_setup_PEs(hose->bus); 435 } 436 } 437 438 static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, 439 struct pci_dev *dev) 440 { 441 /* We delay DMA setup after we have assigned all PE# */ 442 } 443 444 static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, 445 struct pci_bus *bus) 446 { 447 struct pci_dev *dev; 448 449 list_for_each_entry(dev, &bus->devices, bus_list) { 450 set_iommu_table_base(&dev->dev, &pe->tce32_table); 451 if (dev->subordinate) 452 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 453 } 454 } 455 456 static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 457 struct pnv_ioda_pe *pe, 458 unsigned int base, 459 unsigned int segs) 460 { 461 462 struct page *tce_mem = NULL; 463 const __be64 *swinvp; 464 struct iommu_table *tbl; 465 unsigned int i; 466 int64_t rc; 467 void *addr; 468 469 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 470 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 471 472 /* XXX FIXME: Handle 64-bit only DMA devices */ 473 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 474 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 475 476 /* We shouldn't already have a 32-bit DMA associated */ 477 if (WARN_ON(pe->tce32_seg >= 0)) 478 return; 479 480 /* Grab a 32-bit TCE table */ 481 pe->tce32_seg = base; 482 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 483 (base << 28), ((base + segs) << 28) - 1); 484 485 /* XXX Currently, we allocate one big contiguous table for the 486 * TCEs. We only really need one chunk per 256M of TCE space 487 * (ie per segment) but that's an optimization for later, it 488 * requires some added smarts with our get/put_tce implementation 489 */ 490 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 491 get_order(TCE32_TABLE_SIZE * segs)); 492 if (!tce_mem) { 493 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 494 goto fail; 495 } 496 addr = page_address(tce_mem); 497 memset(addr, 0, TCE32_TABLE_SIZE * segs); 498 499 /* Configure HW */ 500 for (i = 0; i < segs; i++) { 501 rc = opal_pci_map_pe_dma_window(phb->opal_id, 502 pe->pe_number, 503 base + i, 1, 504 __pa(addr) + TCE32_TABLE_SIZE * i, 505 TCE32_TABLE_SIZE, 0x1000); 506 if (rc) { 507 pe_err(pe, " Failed to configure 32-bit TCE table," 508 " err %ld\n", rc); 509 goto fail; 510 } 511 } 512 513 /* Setup linux iommu table */ 514 tbl = &pe->tce32_table; 515 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 516 base << 28); 517 518 /* OPAL variant of P7IOC SW invalidated TCEs */ 519 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 520 if (swinvp) { 521 /* We need a couple more fields -- an address and a data 522 * to or. Since the bus is only printed out on table free 523 * errors, and on the first pass the data will be a relative 524 * bus number, print that out instead. 525 */ 526 tbl->it_busno = 0; 527 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); 528 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE 529 | TCE_PCI_SWINV_PAIR; 530 } 531 iommu_init_table(tbl, phb->hose->node); 532 533 if (pe->pdev) 534 set_iommu_table_base(&pe->pdev->dev, tbl); 535 else 536 pnv_ioda_setup_bus_dma(pe, pe->pbus); 537 538 return; 539 fail: 540 /* XXX Failure: Try to fallback to 64-bit only ? */ 541 if (pe->tce32_seg >= 0) 542 pe->tce32_seg = -1; 543 if (tce_mem) 544 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 545 } 546 547 static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb) 548 { 549 struct pci_controller *hose = phb->hose; 550 unsigned int residual, remaining, segs, tw, base; 551 struct pnv_ioda_pe *pe; 552 553 /* If we have more PE# than segments available, hand out one 554 * per PE until we run out and let the rest fail. If not, 555 * then we assign at least one segment per PE, plus more based 556 * on the amount of devices under that PE 557 */ 558 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 559 residual = 0; 560 else 561 residual = phb->ioda.tce32_count - 562 phb->ioda.dma_pe_count; 563 564 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 565 hose->global_number, phb->ioda.tce32_count); 566 pr_info("PCI: %d PE# for a total weight of %d\n", 567 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 568 569 /* Walk our PE list and configure their DMA segments, hand them 570 * out one base segment plus any residual segments based on 571 * weight 572 */ 573 remaining = phb->ioda.tce32_count; 574 tw = phb->ioda.dma_weight; 575 base = 0; 576 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 577 if (!pe->dma_weight) 578 continue; 579 if (!remaining) { 580 pe_warn(pe, "No DMA32 resources available\n"); 581 continue; 582 } 583 segs = 1; 584 if (residual) { 585 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 586 if (segs > remaining) 587 segs = remaining; 588 } 589 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 590 pe->dma_weight, segs); 591 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 592 remaining -= segs; 593 base += segs; 594 } 595 } 596 597 #ifdef CONFIG_PCI_MSI 598 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 599 unsigned int hwirq, unsigned int is_64, 600 struct msi_msg *msg) 601 { 602 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 603 unsigned int xive_num = hwirq - phb->msi_base; 604 uint64_t addr64; 605 uint32_t addr32, data; 606 int rc; 607 608 /* No PE assigned ? bail out ... no MSI for you ! */ 609 if (pe == NULL) 610 return -ENXIO; 611 612 /* Check if we have an MVE */ 613 if (pe->mve_number < 0) 614 return -ENXIO; 615 616 /* Assign XIVE to PE */ 617 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 618 if (rc) { 619 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 620 pci_name(dev), rc, xive_num); 621 return -EIO; 622 } 623 624 if (is_64) { 625 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 626 &addr64, &data); 627 if (rc) { 628 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 629 pci_name(dev), rc); 630 return -EIO; 631 } 632 msg->address_hi = addr64 >> 32; 633 msg->address_lo = addr64 & 0xfffffffful; 634 } else { 635 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 636 &addr32, &data); 637 if (rc) { 638 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 639 pci_name(dev), rc); 640 return -EIO; 641 } 642 msg->address_hi = 0; 643 msg->address_lo = addr32; 644 } 645 msg->data = data; 646 647 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 648 " address=%x_%08x data=%x PE# %d\n", 649 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 650 msg->address_hi, msg->address_lo, data, pe->pe_number); 651 652 return 0; 653 } 654 655 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 656 { 657 unsigned int bmap_size; 658 const __be32 *prop = of_get_property(phb->hose->dn, 659 "ibm,opal-msi-ranges", NULL); 660 if (!prop) { 661 /* BML Fallback */ 662 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 663 } 664 if (!prop) 665 return; 666 667 phb->msi_base = be32_to_cpup(prop); 668 phb->msi_count = be32_to_cpup(prop + 1); 669 bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long); 670 phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL); 671 if (!phb->msi_map) { 672 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 673 phb->hose->global_number); 674 return; 675 } 676 phb->msi_setup = pnv_pci_ioda_msi_setup; 677 phb->msi32_support = 1; 678 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 679 phb->msi_count, phb->msi_base); 680 } 681 #else 682 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 683 #endif /* CONFIG_PCI_MSI */ 684 685 /* 686 * This function is supposed to be called on basis of PE from top 687 * to bottom style. So the the I/O or MMIO segment assigned to 688 * parent PE could be overrided by its child PEs if necessary. 689 */ 690 static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose, 691 struct pnv_ioda_pe *pe) 692 { 693 struct pnv_phb *phb = hose->private_data; 694 struct pci_bus_region region; 695 struct resource *res; 696 int i, index; 697 int rc; 698 699 /* 700 * NOTE: We only care PCI bus based PE for now. For PCI 701 * device based PE, for example SRIOV sensitive VF should 702 * be figured out later. 703 */ 704 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 705 706 pci_bus_for_each_resource(pe->pbus, res, i) { 707 if (!res || !res->flags || 708 res->start > res->end) 709 continue; 710 711 if (res->flags & IORESOURCE_IO) { 712 region.start = res->start - phb->ioda.io_pci_base; 713 region.end = res->end - phb->ioda.io_pci_base; 714 index = region.start / phb->ioda.io_segsize; 715 716 while (index < phb->ioda.total_pe && 717 region.start <= region.end) { 718 phb->ioda.io_segmap[index] = pe->pe_number; 719 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 720 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 721 if (rc != OPAL_SUCCESS) { 722 pr_err("%s: OPAL error %d when mapping IO " 723 "segment #%d to PE#%d\n", 724 __func__, rc, index, pe->pe_number); 725 break; 726 } 727 728 region.start += phb->ioda.io_segsize; 729 index++; 730 } 731 } else if (res->flags & IORESOURCE_MEM) { 732 region.start = res->start - 733 hose->pci_mem_offset - 734 phb->ioda.m32_pci_base; 735 region.end = res->end - 736 hose->pci_mem_offset - 737 phb->ioda.m32_pci_base; 738 index = region.start / phb->ioda.m32_segsize; 739 740 while (index < phb->ioda.total_pe && 741 region.start <= region.end) { 742 phb->ioda.m32_segmap[index] = pe->pe_number; 743 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 744 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 745 if (rc != OPAL_SUCCESS) { 746 pr_err("%s: OPAL error %d when mapping M32 " 747 "segment#%d to PE#%d", 748 __func__, rc, index, pe->pe_number); 749 break; 750 } 751 752 region.start += phb->ioda.m32_segsize; 753 index++; 754 } 755 } 756 } 757 } 758 759 static void __devinit pnv_pci_ioda_setup_seg(void) 760 { 761 struct pci_controller *tmp, *hose; 762 struct pnv_phb *phb; 763 struct pnv_ioda_pe *pe; 764 765 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 766 phb = hose->private_data; 767 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 768 pnv_ioda_setup_pe_seg(hose, pe); 769 } 770 } 771 } 772 773 static void __devinit pnv_pci_ioda_setup_DMA(void) 774 { 775 struct pci_controller *hose, *tmp; 776 struct pnv_phb *phb; 777 778 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 779 pnv_ioda_setup_dma(hose->private_data); 780 781 /* Mark the PHB initialization done */ 782 phb = hose->private_data; 783 phb->initialized = 1; 784 } 785 } 786 787 static void __devinit pnv_pci_ioda_fixup(void) 788 { 789 pnv_pci_ioda_setup_PEs(); 790 pnv_pci_ioda_setup_seg(); 791 pnv_pci_ioda_setup_DMA(); 792 } 793 794 /* 795 * Returns the alignment for I/O or memory windows for P2P 796 * bridges. That actually depends on how PEs are segmented. 797 * For now, we return I/O or M32 segment size for PE sensitive 798 * P2P bridges. Otherwise, the default values (4KiB for I/O, 799 * 1MiB for memory) will be returned. 800 * 801 * The current PCI bus might be put into one PE, which was 802 * create against the parent PCI bridge. For that case, we 803 * needn't enlarge the alignment so that we can save some 804 * resources. 805 */ 806 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 807 unsigned long type) 808 { 809 struct pci_dev *bridge; 810 struct pci_controller *hose = pci_bus_to_host(bus); 811 struct pnv_phb *phb = hose->private_data; 812 int num_pci_bridges = 0; 813 814 bridge = bus->self; 815 while (bridge) { 816 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 817 num_pci_bridges++; 818 if (num_pci_bridges >= 2) 819 return 1; 820 } 821 822 bridge = bridge->bus->self; 823 } 824 825 /* We need support prefetchable memory window later */ 826 if (type & IORESOURCE_MEM) 827 return phb->ioda.m32_segsize; 828 829 return phb->ioda.io_segsize; 830 } 831 832 /* Prevent enabling devices for which we couldn't properly 833 * assign a PE 834 */ 835 static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev) 836 { 837 struct pci_controller *hose = pci_bus_to_host(dev->bus); 838 struct pnv_phb *phb = hose->private_data; 839 struct pci_dn *pdn; 840 841 /* The function is probably called while the PEs have 842 * not be created yet. For example, resource reassignment 843 * during PCI probe period. We just skip the check if 844 * PEs isn't ready. 845 */ 846 if (!phb->initialized) 847 return 0; 848 849 pdn = pnv_ioda_get_pdn(dev); 850 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 851 return -EINVAL; 852 853 return 0; 854 } 855 856 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 857 u32 devfn) 858 { 859 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 860 } 861 862 void __init pnv_pci_init_ioda1_phb(struct device_node *np) 863 { 864 struct pci_controller *hose; 865 static int primary = 1; 866 struct pnv_phb *phb; 867 unsigned long size, m32map_off, iomap_off, pemap_off; 868 const u64 *prop64; 869 u64 phb_id; 870 void *aux; 871 long rc; 872 873 pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name); 874 875 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 876 if (!prop64) { 877 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 878 return; 879 } 880 phb_id = be64_to_cpup(prop64); 881 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 882 883 phb = alloc_bootmem(sizeof(struct pnv_phb)); 884 if (phb) { 885 memset(phb, 0, sizeof(struct pnv_phb)); 886 phb->hose = hose = pcibios_alloc_controller(np); 887 } 888 if (!phb || !phb->hose) { 889 pr_err("PCI: Failed to allocate PCI controller for %s\n", 890 np->full_name); 891 return; 892 } 893 894 spin_lock_init(&phb->lock); 895 /* XXX Use device-tree */ 896 hose->first_busno = 0; 897 hose->last_busno = 0xff; 898 hose->private_data = phb; 899 phb->opal_id = phb_id; 900 phb->type = PNV_PHB_IODA1; 901 902 /* Detect specific models for error handling */ 903 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 904 phb->model = PNV_PHB_MODEL_P7IOC; 905 else 906 phb->model = PNV_PHB_MODEL_UNKNOWN; 907 908 /* We parse "ranges" now since we need to deduce the register base 909 * from the IO base 910 */ 911 pci_process_bridge_OF_ranges(phb->hose, np, primary); 912 primary = 0; 913 914 /* Magic formula from Milton */ 915 phb->regs = of_iomap(np, 0); 916 if (phb->regs == NULL) 917 pr_err(" Failed to map registers !\n"); 918 919 920 /* XXX This is hack-a-thon. This needs to be changed so that: 921 * - we obtain stuff like PE# etc... from device-tree 922 * - we properly re-allocate M32 ourselves 923 * (the OFW one isn't very good) 924 */ 925 926 /* Initialize more IODA stuff */ 927 phb->ioda.total_pe = 128; 928 929 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 930 /* OFW Has already off top 64k of M32 space (MSI space) */ 931 phb->ioda.m32_size += 0x10000; 932 933 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 934 phb->ioda.m32_pci_base = hose->mem_resources[0].start - 935 hose->pci_mem_offset; 936 phb->ioda.io_size = hose->pci_io_size; 937 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 938 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 939 940 /* Allocate aux data & arrays */ 941 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 942 m32map_off = size; 943 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 944 iomap_off = size; 945 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 946 pemap_off = size; 947 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 948 aux = alloc_bootmem(size); 949 memset(aux, 0, size); 950 phb->ioda.pe_alloc = aux; 951 phb->ioda.m32_segmap = aux + m32map_off; 952 phb->ioda.io_segmap = aux + iomap_off; 953 phb->ioda.pe_array = aux + pemap_off; 954 set_bit(0, phb->ioda.pe_alloc); 955 956 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 957 INIT_LIST_HEAD(&phb->ioda.pe_list); 958 959 /* Calculate how many 32-bit TCE segments we have */ 960 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 961 962 /* Clear unusable m64 */ 963 hose->mem_resources[1].flags = 0; 964 hose->mem_resources[1].start = 0; 965 hose->mem_resources[1].end = 0; 966 hose->mem_resources[2].flags = 0; 967 hose->mem_resources[2].start = 0; 968 hose->mem_resources[2].end = 0; 969 970 #if 0 971 rc = opal_pci_set_phb_mem_window(opal->phb_id, 972 window_type, 973 window_num, 974 starting_real_address, 975 starting_pci_address, 976 segment_size); 977 #endif 978 979 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n", 980 phb->ioda.total_pe, 981 phb->ioda.m32_size, phb->ioda.m32_segsize, 982 phb->ioda.io_size, phb->ioda.io_segsize); 983 984 if (phb->regs) { 985 pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100)); 986 pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160)); 987 pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170)); 988 pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178)); 989 pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180)); 990 pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190)); 991 pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198)); 992 pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0)); 993 } 994 phb->hose->ops = &pnv_pci_ops; 995 996 /* Setup RID -> PE mapping function */ 997 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 998 999 /* Setup TCEs */ 1000 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1001 1002 /* Setup MSI support */ 1003 pnv_pci_init_ioda_msis(phb); 1004 1005 /* 1006 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1007 * to let the PCI core do resource assignment. It's supposed 1008 * that the PCI core will do correct I/O and MMIO alignment 1009 * for the P2P bridge bars so that each PCI bus (excluding 1010 * the child P2P bridges) can form individual PE. 1011 */ 1012 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1013 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1014 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1015 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1016 1017 /* Reset IODA tables to a clean state */ 1018 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1019 if (rc) 1020 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1021 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); 1022 } 1023 1024 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1025 { 1026 struct device_node *phbn; 1027 const u64 *prop64; 1028 u64 hub_id; 1029 1030 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1031 1032 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1033 if (!prop64) { 1034 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1035 return; 1036 } 1037 hub_id = be64_to_cpup(prop64); 1038 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1039 1040 /* Count child PHBs */ 1041 for_each_child_of_node(np, phbn) { 1042 /* Look for IODA1 PHBs */ 1043 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1044 pnv_pci_init_ioda1_phb(phbn); 1045 } 1046 } 1047