1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/debugfs.h> 17 #include <linux/delay.h> 18 #include <linux/string.h> 19 #include <linux/init.h> 20 #include <linux/bootmem.h> 21 #include <linux/irq.h> 22 #include <linux/io.h> 23 #include <linux/msi.h> 24 #include <linux/memblock.h> 25 26 #include <asm/sections.h> 27 #include <asm/io.h> 28 #include <asm/prom.h> 29 #include <asm/pci-bridge.h> 30 #include <asm/machdep.h> 31 #include <asm/msi_bitmap.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/opal.h> 34 #include <asm/iommu.h> 35 #include <asm/tce.h> 36 #include <asm/xics.h> 37 #include <asm/debug.h> 38 39 #include "powernv.h" 40 #include "pci.h" 41 42 #define define_pe_printk_level(func, kern_level) \ 43 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 44 { \ 45 struct va_format vaf; \ 46 va_list args; \ 47 char pfix[32]; \ 48 int r; \ 49 \ 50 va_start(args, fmt); \ 51 \ 52 vaf.fmt = fmt; \ 53 vaf.va = &args; \ 54 \ 55 if (pe->pdev) \ 56 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 57 sizeof(pfix)); \ 58 else \ 59 sprintf(pfix, "%04x:%02x ", \ 60 pci_domain_nr(pe->pbus), \ 61 pe->pbus->number); \ 62 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 63 pfix, pe->pe_number, &vaf); \ 64 \ 65 va_end(args); \ 66 \ 67 return r; \ 68 } \ 69 70 define_pe_printk_level(pe_err, KERN_ERR); 71 define_pe_printk_level(pe_warn, KERN_WARNING); 72 define_pe_printk_level(pe_info, KERN_INFO); 73 74 /* 75 * stdcix is only supposed to be used in hypervisor real mode as per 76 * the architecture spec 77 */ 78 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 79 { 80 __asm__ __volatile__("stdcix %0,0,%1" 81 : : "r" (val), "r" (paddr) : "memory"); 82 } 83 84 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 85 { 86 unsigned long pe; 87 88 do { 89 pe = find_next_zero_bit(phb->ioda.pe_alloc, 90 phb->ioda.total_pe, 0); 91 if (pe >= phb->ioda.total_pe) 92 return IODA_INVALID_PE; 93 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 94 95 phb->ioda.pe_array[pe].phb = phb; 96 phb->ioda.pe_array[pe].pe_number = pe; 97 return pe; 98 } 99 100 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 101 { 102 WARN_ON(phb->ioda.pe_array[pe].pdev); 103 104 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 105 clear_bit(pe, phb->ioda.pe_alloc); 106 } 107 108 /* Currently those 2 are only used when MSIs are enabled, this will change 109 * but in the meantime, we need to protect them to avoid warnings 110 */ 111 #ifdef CONFIG_PCI_MSI 112 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 113 { 114 struct pci_controller *hose = pci_bus_to_host(dev->bus); 115 struct pnv_phb *phb = hose->private_data; 116 struct pci_dn *pdn = pci_get_pdn(dev); 117 118 if (!pdn) 119 return NULL; 120 if (pdn->pe_number == IODA_INVALID_PE) 121 return NULL; 122 return &phb->ioda.pe_array[pdn->pe_number]; 123 } 124 #endif /* CONFIG_PCI_MSI */ 125 126 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 127 { 128 struct pci_dev *parent; 129 uint8_t bcomp, dcomp, fcomp; 130 long rc, rid_end, rid; 131 132 /* Bus validation ? */ 133 if (pe->pbus) { 134 int count; 135 136 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 137 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 138 parent = pe->pbus->self; 139 if (pe->flags & PNV_IODA_PE_BUS_ALL) 140 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 141 else 142 count = 1; 143 144 switch(count) { 145 case 1: bcomp = OpalPciBusAll; break; 146 case 2: bcomp = OpalPciBus7Bits; break; 147 case 4: bcomp = OpalPciBus6Bits; break; 148 case 8: bcomp = OpalPciBus5Bits; break; 149 case 16: bcomp = OpalPciBus4Bits; break; 150 case 32: bcomp = OpalPciBus3Bits; break; 151 default: 152 pr_err("%s: Number of subordinate busses %d" 153 " unsupported\n", 154 pci_name(pe->pbus->self), count); 155 /* Do an exact match only */ 156 bcomp = OpalPciBusAll; 157 } 158 rid_end = pe->rid + (count << 8); 159 } else { 160 parent = pe->pdev->bus->self; 161 bcomp = OpalPciBusAll; 162 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 163 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 164 rid_end = pe->rid + 1; 165 } 166 167 /* 168 * Associate PE in PELT. We need add the PE into the 169 * corresponding PELT-V as well. Otherwise, the error 170 * originated from the PE might contribute to other 171 * PEs. 172 */ 173 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 174 bcomp, dcomp, fcomp, OPAL_MAP_PE); 175 if (rc) { 176 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 177 return -ENXIO; 178 } 179 180 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 181 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 182 if (rc) 183 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 184 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 185 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 186 187 /* Add to all parents PELT-V */ 188 while (parent) { 189 struct pci_dn *pdn = pci_get_pdn(parent); 190 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 191 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 192 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 193 /* XXX What to do in case of error ? */ 194 } 195 parent = parent->bus->self; 196 } 197 /* Setup reverse map */ 198 for (rid = pe->rid; rid < rid_end; rid++) 199 phb->ioda.pe_rmap[rid] = pe->pe_number; 200 201 /* Setup one MVTs on IODA1 */ 202 if (phb->type == PNV_PHB_IODA1) { 203 pe->mve_number = pe->pe_number; 204 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 205 pe->pe_number); 206 if (rc) { 207 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 208 rc, pe->mve_number); 209 pe->mve_number = -1; 210 } else { 211 rc = opal_pci_set_mve_enable(phb->opal_id, 212 pe->mve_number, OPAL_ENABLE_MVE); 213 if (rc) { 214 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 215 rc, pe->mve_number); 216 pe->mve_number = -1; 217 } 218 } 219 } else if (phb->type == PNV_PHB_IODA2) 220 pe->mve_number = 0; 221 222 return 0; 223 } 224 225 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 226 struct pnv_ioda_pe *pe) 227 { 228 struct pnv_ioda_pe *lpe; 229 230 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 231 if (lpe->dma_weight < pe->dma_weight) { 232 list_add_tail(&pe->dma_link, &lpe->dma_link); 233 return; 234 } 235 } 236 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 237 } 238 239 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 240 { 241 /* This is quite simplistic. The "base" weight of a device 242 * is 10. 0 means no DMA is to be accounted for it. 243 */ 244 245 /* If it's a bridge, no DMA */ 246 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 247 return 0; 248 249 /* Reduce the weight of slow USB controllers */ 250 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 251 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 252 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 253 return 3; 254 255 /* Increase the weight of RAID (includes Obsidian) */ 256 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 257 return 15; 258 259 /* Default */ 260 return 10; 261 } 262 263 #if 0 264 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 265 { 266 struct pci_controller *hose = pci_bus_to_host(dev->bus); 267 struct pnv_phb *phb = hose->private_data; 268 struct pci_dn *pdn = pci_get_pdn(dev); 269 struct pnv_ioda_pe *pe; 270 int pe_num; 271 272 if (!pdn) { 273 pr_err("%s: Device tree node not associated properly\n", 274 pci_name(dev)); 275 return NULL; 276 } 277 if (pdn->pe_number != IODA_INVALID_PE) 278 return NULL; 279 280 /* PE#0 has been pre-set */ 281 if (dev->bus->number == 0) 282 pe_num = 0; 283 else 284 pe_num = pnv_ioda_alloc_pe(phb); 285 if (pe_num == IODA_INVALID_PE) { 286 pr_warning("%s: Not enough PE# available, disabling device\n", 287 pci_name(dev)); 288 return NULL; 289 } 290 291 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 292 * pointer in the PE data structure, both should be destroyed at the 293 * same time. However, this needs to be looked at more closely again 294 * once we actually start removing things (Hotplug, SR-IOV, ...) 295 * 296 * At some point we want to remove the PDN completely anyways 297 */ 298 pe = &phb->ioda.pe_array[pe_num]; 299 pci_dev_get(dev); 300 pdn->pcidev = dev; 301 pdn->pe_number = pe_num; 302 pe->pdev = dev; 303 pe->pbus = NULL; 304 pe->tce32_seg = -1; 305 pe->mve_number = -1; 306 pe->rid = dev->bus->number << 8 | pdn->devfn; 307 308 pe_info(pe, "Associated device to PE\n"); 309 310 if (pnv_ioda_configure_pe(phb, pe)) { 311 /* XXX What do we do here ? */ 312 if (pe_num) 313 pnv_ioda_free_pe(phb, pe_num); 314 pdn->pe_number = IODA_INVALID_PE; 315 pe->pdev = NULL; 316 pci_dev_put(dev); 317 return NULL; 318 } 319 320 /* Assign a DMA weight to the device */ 321 pe->dma_weight = pnv_ioda_dma_weight(dev); 322 if (pe->dma_weight != 0) { 323 phb->ioda.dma_weight += pe->dma_weight; 324 phb->ioda.dma_pe_count++; 325 } 326 327 /* Link the PE */ 328 pnv_ioda_link_pe_by_weight(phb, pe); 329 330 return pe; 331 } 332 #endif /* Useful for SRIOV case */ 333 334 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 335 { 336 struct pci_dev *dev; 337 338 list_for_each_entry(dev, &bus->devices, bus_list) { 339 struct pci_dn *pdn = pci_get_pdn(dev); 340 341 if (pdn == NULL) { 342 pr_warn("%s: No device node associated with device !\n", 343 pci_name(dev)); 344 continue; 345 } 346 pdn->pcidev = dev; 347 pdn->pe_number = pe->pe_number; 348 pe->dma_weight += pnv_ioda_dma_weight(dev); 349 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 350 pnv_ioda_setup_same_PE(dev->subordinate, pe); 351 } 352 } 353 354 /* 355 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 356 * single PCI bus. Another one that contains the primary PCI bus and its 357 * subordinate PCI devices and buses. The second type of PE is normally 358 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 359 */ 360 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 361 { 362 struct pci_controller *hose = pci_bus_to_host(bus); 363 struct pnv_phb *phb = hose->private_data; 364 struct pnv_ioda_pe *pe; 365 int pe_num; 366 367 pe_num = pnv_ioda_alloc_pe(phb); 368 if (pe_num == IODA_INVALID_PE) { 369 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 370 __func__, pci_domain_nr(bus), bus->number); 371 return; 372 } 373 374 pe = &phb->ioda.pe_array[pe_num]; 375 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 376 pe->pbus = bus; 377 pe->pdev = NULL; 378 pe->tce32_seg = -1; 379 pe->mve_number = -1; 380 pe->rid = bus->busn_res.start << 8; 381 pe->dma_weight = 0; 382 383 if (all) 384 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 385 bus->busn_res.start, bus->busn_res.end, pe_num); 386 else 387 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 388 bus->busn_res.start, pe_num); 389 390 if (pnv_ioda_configure_pe(phb, pe)) { 391 /* XXX What do we do here ? */ 392 if (pe_num) 393 pnv_ioda_free_pe(phb, pe_num); 394 pe->pbus = NULL; 395 return; 396 } 397 398 /* Associate it with all child devices */ 399 pnv_ioda_setup_same_PE(bus, pe); 400 401 /* Put PE to the list */ 402 list_add_tail(&pe->list, &phb->ioda.pe_list); 403 404 /* Account for one DMA PE if at least one DMA capable device exist 405 * below the bridge 406 */ 407 if (pe->dma_weight != 0) { 408 phb->ioda.dma_weight += pe->dma_weight; 409 phb->ioda.dma_pe_count++; 410 } 411 412 /* Link the PE */ 413 pnv_ioda_link_pe_by_weight(phb, pe); 414 } 415 416 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 417 { 418 struct pci_dev *dev; 419 420 pnv_ioda_setup_bus_PE(bus, 0); 421 422 list_for_each_entry(dev, &bus->devices, bus_list) { 423 if (dev->subordinate) { 424 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 425 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 426 else 427 pnv_ioda_setup_PEs(dev->subordinate); 428 } 429 } 430 } 431 432 /* 433 * Configure PEs so that the downstream PCI buses and devices 434 * could have their associated PE#. Unfortunately, we didn't 435 * figure out the way to identify the PLX bridge yet. So we 436 * simply put the PCI bus and the subordinate behind the root 437 * port to PE# here. The game rule here is expected to be changed 438 * as soon as we can detected PLX bridge correctly. 439 */ 440 static void pnv_pci_ioda_setup_PEs(void) 441 { 442 struct pci_controller *hose, *tmp; 443 444 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 445 pnv_ioda_setup_PEs(hose->bus); 446 } 447 } 448 449 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 450 { 451 struct pci_dn *pdn = pci_get_pdn(pdev); 452 struct pnv_ioda_pe *pe; 453 454 /* 455 * The function can be called while the PE# 456 * hasn't been assigned. Do nothing for the 457 * case. 458 */ 459 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 460 return; 461 462 pe = &phb->ioda.pe_array[pdn->pe_number]; 463 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 464 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 465 } 466 467 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 468 struct pci_dev *pdev, u64 dma_mask) 469 { 470 struct pci_dn *pdn = pci_get_pdn(pdev); 471 struct pnv_ioda_pe *pe; 472 uint64_t top; 473 bool bypass = false; 474 475 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 476 return -ENODEV;; 477 478 pe = &phb->ioda.pe_array[pdn->pe_number]; 479 if (pe->tce_bypass_enabled) { 480 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 481 bypass = (dma_mask >= top); 482 } 483 484 if (bypass) { 485 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 486 set_dma_ops(&pdev->dev, &dma_direct_ops); 487 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 488 } else { 489 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 490 set_dma_ops(&pdev->dev, &dma_iommu_ops); 491 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 492 } 493 return 0; 494 } 495 496 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 497 { 498 struct pci_dev *dev; 499 500 list_for_each_entry(dev, &bus->devices, bus_list) { 501 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); 502 if (dev->subordinate) 503 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 504 } 505 } 506 507 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 508 struct iommu_table *tbl, 509 __be64 *startp, __be64 *endp, bool rm) 510 { 511 __be64 __iomem *invalidate = rm ? 512 (__be64 __iomem *)pe->tce_inval_reg_phys : 513 (__be64 __iomem *)tbl->it_index; 514 unsigned long start, end, inc; 515 516 start = __pa(startp); 517 end = __pa(endp); 518 519 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 520 if (tbl->it_busno) { 521 start <<= 12; 522 end <<= 12; 523 inc = 128 << 12; 524 start |= tbl->it_busno; 525 end |= tbl->it_busno; 526 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 527 /* p7ioc-style invalidation, 2 TCEs per write */ 528 start |= (1ull << 63); 529 end |= (1ull << 63); 530 inc = 16; 531 } else { 532 /* Default (older HW) */ 533 inc = 128; 534 } 535 536 end |= inc - 1; /* round up end to be different than start */ 537 538 mb(); /* Ensure above stores are visible */ 539 while (start <= end) { 540 if (rm) 541 __raw_rm_writeq(cpu_to_be64(start), invalidate); 542 else 543 __raw_writeq(cpu_to_be64(start), invalidate); 544 start += inc; 545 } 546 547 /* 548 * The iommu layer will do another mb() for us on build() 549 * and we don't care on free() 550 */ 551 } 552 553 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 554 struct iommu_table *tbl, 555 __be64 *startp, __be64 *endp, bool rm) 556 { 557 unsigned long start, end, inc; 558 __be64 __iomem *invalidate = rm ? 559 (__be64 __iomem *)pe->tce_inval_reg_phys : 560 (__be64 __iomem *)tbl->it_index; 561 562 /* We'll invalidate DMA address in PE scope */ 563 start = 0x2ul << 60; 564 start |= (pe->pe_number & 0xFF); 565 end = start; 566 567 /* Figure out the start, end and step */ 568 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 569 start |= (inc << 12); 570 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 571 end |= (inc << 12); 572 inc = (0x1ul << 12); 573 mb(); 574 575 while (start <= end) { 576 if (rm) 577 __raw_rm_writeq(cpu_to_be64(start), invalidate); 578 else 579 __raw_writeq(cpu_to_be64(start), invalidate); 580 start += inc; 581 } 582 } 583 584 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 585 __be64 *startp, __be64 *endp, bool rm) 586 { 587 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 588 tce32_table); 589 struct pnv_phb *phb = pe->phb; 590 591 if (phb->type == PNV_PHB_IODA1) 592 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 593 else 594 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 595 } 596 597 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 598 struct pnv_ioda_pe *pe, unsigned int base, 599 unsigned int segs) 600 { 601 602 struct page *tce_mem = NULL; 603 const __be64 *swinvp; 604 struct iommu_table *tbl; 605 unsigned int i; 606 int64_t rc; 607 void *addr; 608 609 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 610 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 611 612 /* XXX FIXME: Handle 64-bit only DMA devices */ 613 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 614 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 615 616 /* We shouldn't already have a 32-bit DMA associated */ 617 if (WARN_ON(pe->tce32_seg >= 0)) 618 return; 619 620 /* Grab a 32-bit TCE table */ 621 pe->tce32_seg = base; 622 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 623 (base << 28), ((base + segs) << 28) - 1); 624 625 /* XXX Currently, we allocate one big contiguous table for the 626 * TCEs. We only really need one chunk per 256M of TCE space 627 * (ie per segment) but that's an optimization for later, it 628 * requires some added smarts with our get/put_tce implementation 629 */ 630 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 631 get_order(TCE32_TABLE_SIZE * segs)); 632 if (!tce_mem) { 633 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 634 goto fail; 635 } 636 addr = page_address(tce_mem); 637 memset(addr, 0, TCE32_TABLE_SIZE * segs); 638 639 /* Configure HW */ 640 for (i = 0; i < segs; i++) { 641 rc = opal_pci_map_pe_dma_window(phb->opal_id, 642 pe->pe_number, 643 base + i, 1, 644 __pa(addr) + TCE32_TABLE_SIZE * i, 645 TCE32_TABLE_SIZE, 0x1000); 646 if (rc) { 647 pe_err(pe, " Failed to configure 32-bit TCE table," 648 " err %ld\n", rc); 649 goto fail; 650 } 651 } 652 653 /* Setup linux iommu table */ 654 tbl = &pe->tce32_table; 655 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 656 base << 28); 657 658 /* OPAL variant of P7IOC SW invalidated TCEs */ 659 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 660 if (swinvp) { 661 /* We need a couple more fields -- an address and a data 662 * to or. Since the bus is only printed out on table free 663 * errors, and on the first pass the data will be a relative 664 * bus number, print that out instead. 665 */ 666 tbl->it_busno = 0; 667 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 668 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 669 8); 670 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | 671 TCE_PCI_SWINV_PAIR; 672 } 673 iommu_init_table(tbl, phb->hose->node); 674 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 675 676 if (pe->pdev) 677 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 678 else 679 pnv_ioda_setup_bus_dma(pe, pe->pbus); 680 681 return; 682 fail: 683 /* XXX Failure: Try to fallback to 64-bit only ? */ 684 if (pe->tce32_seg >= 0) 685 pe->tce32_seg = -1; 686 if (tce_mem) 687 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 688 } 689 690 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 691 { 692 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 693 tce32_table); 694 uint16_t window_id = (pe->pe_number << 1 ) + 1; 695 int64_t rc; 696 697 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 698 if (enable) { 699 phys_addr_t top = memblock_end_of_DRAM(); 700 701 top = roundup_pow_of_two(top); 702 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 703 pe->pe_number, 704 window_id, 705 pe->tce_bypass_base, 706 top); 707 } else { 708 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 709 pe->pe_number, 710 window_id, 711 pe->tce_bypass_base, 712 0); 713 714 /* 715 * We might want to reset the DMA ops of all devices on 716 * this PE. However in theory, that shouldn't be necessary 717 * as this is used for VFIO/KVM pass-through and the device 718 * hasn't yet been returned to its kernel driver 719 */ 720 } 721 if (rc) 722 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 723 else 724 pe->tce_bypass_enabled = enable; 725 } 726 727 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 728 struct pnv_ioda_pe *pe) 729 { 730 /* TVE #1 is selected by PCI address bit 59 */ 731 pe->tce_bypass_base = 1ull << 59; 732 733 /* Install set_bypass callback for VFIO */ 734 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 735 736 /* Enable bypass by default */ 737 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 738 } 739 740 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 741 struct pnv_ioda_pe *pe) 742 { 743 struct page *tce_mem = NULL; 744 void *addr; 745 const __be64 *swinvp; 746 struct iommu_table *tbl; 747 unsigned int tce_table_size, end; 748 int64_t rc; 749 750 /* We shouldn't already have a 32-bit DMA associated */ 751 if (WARN_ON(pe->tce32_seg >= 0)) 752 return; 753 754 /* The PE will reserve all possible 32-bits space */ 755 pe->tce32_seg = 0; 756 end = (1 << ilog2(phb->ioda.m32_pci_base)); 757 tce_table_size = (end / 0x1000) * 8; 758 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 759 end); 760 761 /* Allocate TCE table */ 762 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 763 get_order(tce_table_size)); 764 if (!tce_mem) { 765 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 766 goto fail; 767 } 768 addr = page_address(tce_mem); 769 memset(addr, 0, tce_table_size); 770 771 /* 772 * Map TCE table through TVT. The TVE index is the PE number 773 * shifted by 1 bit for 32-bits DMA space. 774 */ 775 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 776 pe->pe_number << 1, 1, __pa(addr), 777 tce_table_size, 0x1000); 778 if (rc) { 779 pe_err(pe, "Failed to configure 32-bit TCE table," 780 " err %ld\n", rc); 781 goto fail; 782 } 783 784 /* Setup linux iommu table */ 785 tbl = &pe->tce32_table; 786 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 787 788 /* OPAL variant of PHB3 invalidated TCEs */ 789 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 790 if (swinvp) { 791 /* We need a couple more fields -- an address and a data 792 * to or. Since the bus is only printed out on table free 793 * errors, and on the first pass the data will be a relative 794 * bus number, print that out instead. 795 */ 796 tbl->it_busno = 0; 797 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 798 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 799 8); 800 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; 801 } 802 iommu_init_table(tbl, phb->hose->node); 803 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 804 805 if (pe->pdev) 806 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 807 else 808 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 810 /* Also create a bypass window */ 811 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 812 return; 813 fail: 814 if (pe->tce32_seg >= 0) 815 pe->tce32_seg = -1; 816 if (tce_mem) 817 __free_pages(tce_mem, get_order(tce_table_size)); 818 } 819 820 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 821 { 822 struct pci_controller *hose = phb->hose; 823 unsigned int residual, remaining, segs, tw, base; 824 struct pnv_ioda_pe *pe; 825 826 /* If we have more PE# than segments available, hand out one 827 * per PE until we run out and let the rest fail. If not, 828 * then we assign at least one segment per PE, plus more based 829 * on the amount of devices under that PE 830 */ 831 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 832 residual = 0; 833 else 834 residual = phb->ioda.tce32_count - 835 phb->ioda.dma_pe_count; 836 837 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 838 hose->global_number, phb->ioda.tce32_count); 839 pr_info("PCI: %d PE# for a total weight of %d\n", 840 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 841 842 /* Walk our PE list and configure their DMA segments, hand them 843 * out one base segment plus any residual segments based on 844 * weight 845 */ 846 remaining = phb->ioda.tce32_count; 847 tw = phb->ioda.dma_weight; 848 base = 0; 849 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 850 if (!pe->dma_weight) 851 continue; 852 if (!remaining) { 853 pe_warn(pe, "No DMA32 resources available\n"); 854 continue; 855 } 856 segs = 1; 857 if (residual) { 858 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 859 if (segs > remaining) 860 segs = remaining; 861 } 862 863 /* 864 * For IODA2 compliant PHB3, we needn't care about the weight. 865 * The all available 32-bits DMA space will be assigned to 866 * the specific PE. 867 */ 868 if (phb->type == PNV_PHB_IODA1) { 869 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 870 pe->dma_weight, segs); 871 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 872 } else { 873 pe_info(pe, "Assign DMA32 space\n"); 874 segs = 0; 875 pnv_pci_ioda2_setup_dma_pe(phb, pe); 876 } 877 878 remaining -= segs; 879 base += segs; 880 } 881 } 882 883 #ifdef CONFIG_PCI_MSI 884 static void pnv_ioda2_msi_eoi(struct irq_data *d) 885 { 886 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 887 struct irq_chip *chip = irq_data_get_irq_chip(d); 888 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 889 ioda.irq_chip); 890 int64_t rc; 891 892 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 893 WARN_ON_ONCE(rc); 894 895 icp_native_eoi(d); 896 } 897 898 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 899 unsigned int hwirq, unsigned int virq, 900 unsigned int is_64, struct msi_msg *msg) 901 { 902 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 903 struct pci_dn *pdn = pci_get_pdn(dev); 904 struct irq_data *idata; 905 struct irq_chip *ichip; 906 unsigned int xive_num = hwirq - phb->msi_base; 907 __be32 data; 908 int rc; 909 910 /* No PE assigned ? bail out ... no MSI for you ! */ 911 if (pe == NULL) 912 return -ENXIO; 913 914 /* Check if we have an MVE */ 915 if (pe->mve_number < 0) 916 return -ENXIO; 917 918 /* Force 32-bit MSI on some broken devices */ 919 if (pdn && pdn->force_32bit_msi) 920 is_64 = 0; 921 922 /* Assign XIVE to PE */ 923 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 924 if (rc) { 925 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 926 pci_name(dev), rc, xive_num); 927 return -EIO; 928 } 929 930 if (is_64) { 931 __be64 addr64; 932 933 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 934 &addr64, &data); 935 if (rc) { 936 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 937 pci_name(dev), rc); 938 return -EIO; 939 } 940 msg->address_hi = be64_to_cpu(addr64) >> 32; 941 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 942 } else { 943 __be32 addr32; 944 945 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 946 &addr32, &data); 947 if (rc) { 948 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 949 pci_name(dev), rc); 950 return -EIO; 951 } 952 msg->address_hi = 0; 953 msg->address_lo = be32_to_cpu(addr32); 954 } 955 msg->data = be32_to_cpu(data); 956 957 /* 958 * Change the IRQ chip for the MSI interrupts on PHB3. 959 * The corresponding IRQ chip should be populated for 960 * the first time. 961 */ 962 if (phb->type == PNV_PHB_IODA2) { 963 if (!phb->ioda.irq_chip_init) { 964 idata = irq_get_irq_data(virq); 965 ichip = irq_data_get_irq_chip(idata); 966 phb->ioda.irq_chip_init = 1; 967 phb->ioda.irq_chip = *ichip; 968 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 969 } 970 971 irq_set_chip(virq, &phb->ioda.irq_chip); 972 } 973 974 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 975 " address=%x_%08x data=%x PE# %d\n", 976 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 977 msg->address_hi, msg->address_lo, data, pe->pe_number); 978 979 return 0; 980 } 981 982 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 983 { 984 unsigned int count; 985 const __be32 *prop = of_get_property(phb->hose->dn, 986 "ibm,opal-msi-ranges", NULL); 987 if (!prop) { 988 /* BML Fallback */ 989 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 990 } 991 if (!prop) 992 return; 993 994 phb->msi_base = be32_to_cpup(prop); 995 count = be32_to_cpup(prop + 1); 996 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 997 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 998 phb->hose->global_number); 999 return; 1000 } 1001 1002 phb->msi_setup = pnv_pci_ioda_msi_setup; 1003 phb->msi32_support = 1; 1004 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1005 count, phb->msi_base); 1006 } 1007 #else 1008 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1009 #endif /* CONFIG_PCI_MSI */ 1010 1011 /* 1012 * This function is supposed to be called on basis of PE from top 1013 * to bottom style. So the the I/O or MMIO segment assigned to 1014 * parent PE could be overrided by its child PEs if necessary. 1015 */ 1016 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1017 struct pnv_ioda_pe *pe) 1018 { 1019 struct pnv_phb *phb = hose->private_data; 1020 struct pci_bus_region region; 1021 struct resource *res; 1022 int i, index; 1023 int rc; 1024 1025 /* 1026 * NOTE: We only care PCI bus based PE for now. For PCI 1027 * device based PE, for example SRIOV sensitive VF should 1028 * be figured out later. 1029 */ 1030 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1031 1032 pci_bus_for_each_resource(pe->pbus, res, i) { 1033 if (!res || !res->flags || 1034 res->start > res->end) 1035 continue; 1036 1037 if (res->flags & IORESOURCE_IO) { 1038 region.start = res->start - phb->ioda.io_pci_base; 1039 region.end = res->end - phb->ioda.io_pci_base; 1040 index = region.start / phb->ioda.io_segsize; 1041 1042 while (index < phb->ioda.total_pe && 1043 region.start <= region.end) { 1044 phb->ioda.io_segmap[index] = pe->pe_number; 1045 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1046 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1047 if (rc != OPAL_SUCCESS) { 1048 pr_err("%s: OPAL error %d when mapping IO " 1049 "segment #%d to PE#%d\n", 1050 __func__, rc, index, pe->pe_number); 1051 break; 1052 } 1053 1054 region.start += phb->ioda.io_segsize; 1055 index++; 1056 } 1057 } else if (res->flags & IORESOURCE_MEM) { 1058 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 1059 * harden that algorithm when we start supporting M64 1060 */ 1061 region.start = res->start - 1062 hose->mem_offset[0] - 1063 phb->ioda.m32_pci_base; 1064 region.end = res->end - 1065 hose->mem_offset[0] - 1066 phb->ioda.m32_pci_base; 1067 index = region.start / phb->ioda.m32_segsize; 1068 1069 while (index < phb->ioda.total_pe && 1070 region.start <= region.end) { 1071 phb->ioda.m32_segmap[index] = pe->pe_number; 1072 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1073 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1074 if (rc != OPAL_SUCCESS) { 1075 pr_err("%s: OPAL error %d when mapping M32 " 1076 "segment#%d to PE#%d", 1077 __func__, rc, index, pe->pe_number); 1078 break; 1079 } 1080 1081 region.start += phb->ioda.m32_segsize; 1082 index++; 1083 } 1084 } 1085 } 1086 } 1087 1088 static void pnv_pci_ioda_setup_seg(void) 1089 { 1090 struct pci_controller *tmp, *hose; 1091 struct pnv_phb *phb; 1092 struct pnv_ioda_pe *pe; 1093 1094 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1095 phb = hose->private_data; 1096 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1097 pnv_ioda_setup_pe_seg(hose, pe); 1098 } 1099 } 1100 } 1101 1102 static void pnv_pci_ioda_setup_DMA(void) 1103 { 1104 struct pci_controller *hose, *tmp; 1105 struct pnv_phb *phb; 1106 1107 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1108 pnv_ioda_setup_dma(hose->private_data); 1109 1110 /* Mark the PHB initialization done */ 1111 phb = hose->private_data; 1112 phb->initialized = 1; 1113 } 1114 } 1115 1116 static void pnv_pci_ioda_create_dbgfs(void) 1117 { 1118 #ifdef CONFIG_DEBUG_FS 1119 struct pci_controller *hose, *tmp; 1120 struct pnv_phb *phb; 1121 char name[16]; 1122 1123 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1124 phb = hose->private_data; 1125 1126 sprintf(name, "PCI%04x", hose->global_number); 1127 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1128 if (!phb->dbgfs) 1129 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1130 __func__, hose->global_number); 1131 } 1132 #endif /* CONFIG_DEBUG_FS */ 1133 } 1134 1135 static void pnv_pci_ioda_fixup(void) 1136 { 1137 pnv_pci_ioda_setup_PEs(); 1138 pnv_pci_ioda_setup_seg(); 1139 pnv_pci_ioda_setup_DMA(); 1140 1141 pnv_pci_ioda_create_dbgfs(); 1142 1143 #ifdef CONFIG_EEH 1144 eeh_probe_mode_set(EEH_PROBE_MODE_DEV); 1145 eeh_addr_cache_build(); 1146 eeh_init(); 1147 #endif 1148 } 1149 1150 /* 1151 * Returns the alignment for I/O or memory windows for P2P 1152 * bridges. That actually depends on how PEs are segmented. 1153 * For now, we return I/O or M32 segment size for PE sensitive 1154 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1155 * 1MiB for memory) will be returned. 1156 * 1157 * The current PCI bus might be put into one PE, which was 1158 * create against the parent PCI bridge. For that case, we 1159 * needn't enlarge the alignment so that we can save some 1160 * resources. 1161 */ 1162 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1163 unsigned long type) 1164 { 1165 struct pci_dev *bridge; 1166 struct pci_controller *hose = pci_bus_to_host(bus); 1167 struct pnv_phb *phb = hose->private_data; 1168 int num_pci_bridges = 0; 1169 1170 bridge = bus->self; 1171 while (bridge) { 1172 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1173 num_pci_bridges++; 1174 if (num_pci_bridges >= 2) 1175 return 1; 1176 } 1177 1178 bridge = bridge->bus->self; 1179 } 1180 1181 /* We need support prefetchable memory window later */ 1182 if (type & IORESOURCE_MEM) 1183 return phb->ioda.m32_segsize; 1184 1185 return phb->ioda.io_segsize; 1186 } 1187 1188 /* Prevent enabling devices for which we couldn't properly 1189 * assign a PE 1190 */ 1191 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1192 { 1193 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1194 struct pnv_phb *phb = hose->private_data; 1195 struct pci_dn *pdn; 1196 1197 /* The function is probably called while the PEs have 1198 * not be created yet. For example, resource reassignment 1199 * during PCI probe period. We just skip the check if 1200 * PEs isn't ready. 1201 */ 1202 if (!phb->initialized) 1203 return 0; 1204 1205 pdn = pci_get_pdn(dev); 1206 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1207 return -EINVAL; 1208 1209 return 0; 1210 } 1211 1212 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1213 u32 devfn) 1214 { 1215 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1216 } 1217 1218 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1219 { 1220 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1221 OPAL_ASSERT_RESET); 1222 } 1223 1224 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1225 u64 hub_id, int ioda_type) 1226 { 1227 struct pci_controller *hose; 1228 struct pnv_phb *phb; 1229 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1230 const __be64 *prop64; 1231 const __be32 *prop32; 1232 int len; 1233 u64 phb_id; 1234 void *aux; 1235 long rc; 1236 1237 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1238 1239 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1240 if (!prop64) { 1241 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1242 return; 1243 } 1244 phb_id = be64_to_cpup(prop64); 1245 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1246 1247 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1248 if (!phb) { 1249 pr_err(" Out of memory !\n"); 1250 return; 1251 } 1252 1253 /* Allocate PCI controller */ 1254 memset(phb, 0, sizeof(struct pnv_phb)); 1255 phb->hose = hose = pcibios_alloc_controller(np); 1256 if (!phb->hose) { 1257 pr_err(" Can't allocate PCI controller for %s\n", 1258 np->full_name); 1259 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1260 return; 1261 } 1262 1263 spin_lock_init(&phb->lock); 1264 prop32 = of_get_property(np, "bus-range", &len); 1265 if (prop32 && len == 8) { 1266 hose->first_busno = be32_to_cpu(prop32[0]); 1267 hose->last_busno = be32_to_cpu(prop32[1]); 1268 } else { 1269 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1270 hose->first_busno = 0; 1271 hose->last_busno = 0xff; 1272 } 1273 hose->private_data = phb; 1274 phb->hub_id = hub_id; 1275 phb->opal_id = phb_id; 1276 phb->type = ioda_type; 1277 1278 /* Detect specific models for error handling */ 1279 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1280 phb->model = PNV_PHB_MODEL_P7IOC; 1281 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1282 phb->model = PNV_PHB_MODEL_PHB3; 1283 else 1284 phb->model = PNV_PHB_MODEL_UNKNOWN; 1285 1286 /* Parse 32-bit and IO ranges (if any) */ 1287 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1288 1289 /* Get registers */ 1290 phb->regs = of_iomap(np, 0); 1291 if (phb->regs == NULL) 1292 pr_err(" Failed to map registers !\n"); 1293 1294 /* Initialize more IODA stuff */ 1295 phb->ioda.total_pe = 1; 1296 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1297 if (prop32) 1298 phb->ioda.total_pe = be32_to_cpup(prop32); 1299 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1300 if (prop32) 1301 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1302 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1303 /* FW Has already off top 64k of M32 space (MSI space) */ 1304 phb->ioda.m32_size += 0x10000; 1305 1306 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1307 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1308 phb->ioda.io_size = hose->pci_io_size; 1309 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1310 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1311 1312 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1313 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1314 m32map_off = size; 1315 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1316 if (phb->type == PNV_PHB_IODA1) { 1317 iomap_off = size; 1318 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1319 } 1320 pemap_off = size; 1321 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1322 aux = alloc_bootmem(size); 1323 memset(aux, 0, size); 1324 phb->ioda.pe_alloc = aux; 1325 phb->ioda.m32_segmap = aux + m32map_off; 1326 if (phb->type == PNV_PHB_IODA1) 1327 phb->ioda.io_segmap = aux + iomap_off; 1328 phb->ioda.pe_array = aux + pemap_off; 1329 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1330 1331 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1332 INIT_LIST_HEAD(&phb->ioda.pe_list); 1333 1334 /* Calculate how many 32-bit TCE segments we have */ 1335 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1336 1337 /* Clear unusable m64 */ 1338 hose->mem_resources[1].flags = 0; 1339 hose->mem_resources[1].start = 0; 1340 hose->mem_resources[1].end = 0; 1341 hose->mem_resources[2].flags = 0; 1342 hose->mem_resources[2].start = 0; 1343 hose->mem_resources[2].end = 0; 1344 1345 #if 0 /* We should really do that ... */ 1346 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1347 window_type, 1348 window_num, 1349 starting_real_address, 1350 starting_pci_address, 1351 segment_size); 1352 #endif 1353 1354 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" 1355 " IO: 0x%x [segment=0x%x]\n", 1356 phb->ioda.total_pe, 1357 phb->ioda.reserved_pe, 1358 phb->ioda.m32_size, phb->ioda.m32_segsize, 1359 phb->ioda.io_size, phb->ioda.io_segsize); 1360 1361 phb->hose->ops = &pnv_pci_ops; 1362 #ifdef CONFIG_EEH 1363 phb->eeh_ops = &ioda_eeh_ops; 1364 #endif 1365 1366 /* Setup RID -> PE mapping function */ 1367 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1368 1369 /* Setup TCEs */ 1370 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1372 1373 /* Setup shutdown function for kexec */ 1374 phb->shutdown = pnv_pci_ioda_shutdown; 1375 1376 /* Setup MSI support */ 1377 pnv_pci_init_ioda_msis(phb); 1378 1379 /* 1380 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1381 * to let the PCI core do resource assignment. It's supposed 1382 * that the PCI core will do correct I/O and MMIO alignment 1383 * for the P2P bridge bars so that each PCI bus (excluding 1384 * the child P2P bridges) can form individual PE. 1385 */ 1386 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1387 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1388 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1389 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1390 1391 /* Reset IODA tables to a clean state */ 1392 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1393 if (rc) 1394 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1395 } 1396 1397 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 1398 { 1399 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1400 } 1401 1402 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1403 { 1404 struct device_node *phbn; 1405 const __be64 *prop64; 1406 u64 hub_id; 1407 1408 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1409 1410 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1411 if (!prop64) { 1412 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1413 return; 1414 } 1415 hub_id = be64_to_cpup(prop64); 1416 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1417 1418 /* Count child PHBs */ 1419 for_each_child_of_node(np, phbn) { 1420 /* Look for IODA1 PHBs */ 1421 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1422 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1423 } 1424 } 1425