1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/debugfs.h> 17 #include <linux/delay.h> 18 #include <linux/string.h> 19 #include <linux/init.h> 20 #include <linux/bootmem.h> 21 #include <linux/irq.h> 22 #include <linux/io.h> 23 #include <linux/msi.h> 24 #include <linux/memblock.h> 25 26 #include <asm/sections.h> 27 #include <asm/io.h> 28 #include <asm/prom.h> 29 #include <asm/pci-bridge.h> 30 #include <asm/machdep.h> 31 #include <asm/msi_bitmap.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/opal.h> 34 #include <asm/iommu.h> 35 #include <asm/tce.h> 36 #include <asm/xics.h> 37 #include <asm/debug.h> 38 39 #include "powernv.h" 40 #include "pci.h" 41 42 #define define_pe_printk_level(func, kern_level) \ 43 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 44 { \ 45 struct va_format vaf; \ 46 va_list args; \ 47 char pfix[32]; \ 48 int r; \ 49 \ 50 va_start(args, fmt); \ 51 \ 52 vaf.fmt = fmt; \ 53 vaf.va = &args; \ 54 \ 55 if (pe->pdev) \ 56 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 57 sizeof(pfix)); \ 58 else \ 59 sprintf(pfix, "%04x:%02x ", \ 60 pci_domain_nr(pe->pbus), \ 61 pe->pbus->number); \ 62 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 63 pfix, pe->pe_number, &vaf); \ 64 \ 65 va_end(args); \ 66 \ 67 return r; \ 68 } \ 69 70 define_pe_printk_level(pe_err, KERN_ERR); 71 define_pe_printk_level(pe_warn, KERN_WARNING); 72 define_pe_printk_level(pe_info, KERN_INFO); 73 74 /* 75 * stdcix is only supposed to be used in hypervisor real mode as per 76 * the architecture spec 77 */ 78 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 79 { 80 __asm__ __volatile__("stdcix %0,0,%1" 81 : : "r" (val), "r" (paddr) : "memory"); 82 } 83 84 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 85 { 86 unsigned long pe; 87 88 do { 89 pe = find_next_zero_bit(phb->ioda.pe_alloc, 90 phb->ioda.total_pe, 0); 91 if (pe >= phb->ioda.total_pe) 92 return IODA_INVALID_PE; 93 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 94 95 phb->ioda.pe_array[pe].phb = phb; 96 phb->ioda.pe_array[pe].pe_number = pe; 97 return pe; 98 } 99 100 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 101 { 102 WARN_ON(phb->ioda.pe_array[pe].pdev); 103 104 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 105 clear_bit(pe, phb->ioda.pe_alloc); 106 } 107 108 /* Currently those 2 are only used when MSIs are enabled, this will change 109 * but in the meantime, we need to protect them to avoid warnings 110 */ 111 #ifdef CONFIG_PCI_MSI 112 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 113 { 114 struct pci_controller *hose = pci_bus_to_host(dev->bus); 115 struct pnv_phb *phb = hose->private_data; 116 struct pci_dn *pdn = pci_get_pdn(dev); 117 118 if (!pdn) 119 return NULL; 120 if (pdn->pe_number == IODA_INVALID_PE) 121 return NULL; 122 return &phb->ioda.pe_array[pdn->pe_number]; 123 } 124 #endif /* CONFIG_PCI_MSI */ 125 126 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 127 { 128 struct pci_dev *parent; 129 uint8_t bcomp, dcomp, fcomp; 130 long rc, rid_end, rid; 131 132 /* Bus validation ? */ 133 if (pe->pbus) { 134 int count; 135 136 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 137 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 138 parent = pe->pbus->self; 139 if (pe->flags & PNV_IODA_PE_BUS_ALL) 140 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 141 else 142 count = 1; 143 144 switch(count) { 145 case 1: bcomp = OpalPciBusAll; break; 146 case 2: bcomp = OpalPciBus7Bits; break; 147 case 4: bcomp = OpalPciBus6Bits; break; 148 case 8: bcomp = OpalPciBus5Bits; break; 149 case 16: bcomp = OpalPciBus4Bits; break; 150 case 32: bcomp = OpalPciBus3Bits; break; 151 default: 152 pr_err("%s: Number of subordinate busses %d" 153 " unsupported\n", 154 pci_name(pe->pbus->self), count); 155 /* Do an exact match only */ 156 bcomp = OpalPciBusAll; 157 } 158 rid_end = pe->rid + (count << 8); 159 } else { 160 parent = pe->pdev->bus->self; 161 bcomp = OpalPciBusAll; 162 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 163 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 164 rid_end = pe->rid + 1; 165 } 166 167 /* 168 * Associate PE in PELT. We need add the PE into the 169 * corresponding PELT-V as well. Otherwise, the error 170 * originated from the PE might contribute to other 171 * PEs. 172 */ 173 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 174 bcomp, dcomp, fcomp, OPAL_MAP_PE); 175 if (rc) { 176 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 177 return -ENXIO; 178 } 179 180 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 181 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 182 if (rc) 183 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 184 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 185 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 186 187 /* Add to all parents PELT-V */ 188 while (parent) { 189 struct pci_dn *pdn = pci_get_pdn(parent); 190 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 191 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 192 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 193 /* XXX What to do in case of error ? */ 194 } 195 parent = parent->bus->self; 196 } 197 /* Setup reverse map */ 198 for (rid = pe->rid; rid < rid_end; rid++) 199 phb->ioda.pe_rmap[rid] = pe->pe_number; 200 201 /* Setup one MVTs on IODA1 */ 202 if (phb->type == PNV_PHB_IODA1) { 203 pe->mve_number = pe->pe_number; 204 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 205 pe->pe_number); 206 if (rc) { 207 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 208 rc, pe->mve_number); 209 pe->mve_number = -1; 210 } else { 211 rc = opal_pci_set_mve_enable(phb->opal_id, 212 pe->mve_number, OPAL_ENABLE_MVE); 213 if (rc) { 214 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 215 rc, pe->mve_number); 216 pe->mve_number = -1; 217 } 218 } 219 } else if (phb->type == PNV_PHB_IODA2) 220 pe->mve_number = 0; 221 222 return 0; 223 } 224 225 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 226 struct pnv_ioda_pe *pe) 227 { 228 struct pnv_ioda_pe *lpe; 229 230 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 231 if (lpe->dma_weight < pe->dma_weight) { 232 list_add_tail(&pe->dma_link, &lpe->dma_link); 233 return; 234 } 235 } 236 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 237 } 238 239 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 240 { 241 /* This is quite simplistic. The "base" weight of a device 242 * is 10. 0 means no DMA is to be accounted for it. 243 */ 244 245 /* If it's a bridge, no DMA */ 246 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 247 return 0; 248 249 /* Reduce the weight of slow USB controllers */ 250 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 251 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 252 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 253 return 3; 254 255 /* Increase the weight of RAID (includes Obsidian) */ 256 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 257 return 15; 258 259 /* Default */ 260 return 10; 261 } 262 263 #if 0 264 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 265 { 266 struct pci_controller *hose = pci_bus_to_host(dev->bus); 267 struct pnv_phb *phb = hose->private_data; 268 struct pci_dn *pdn = pci_get_pdn(dev); 269 struct pnv_ioda_pe *pe; 270 int pe_num; 271 272 if (!pdn) { 273 pr_err("%s: Device tree node not associated properly\n", 274 pci_name(dev)); 275 return NULL; 276 } 277 if (pdn->pe_number != IODA_INVALID_PE) 278 return NULL; 279 280 /* PE#0 has been pre-set */ 281 if (dev->bus->number == 0) 282 pe_num = 0; 283 else 284 pe_num = pnv_ioda_alloc_pe(phb); 285 if (pe_num == IODA_INVALID_PE) { 286 pr_warning("%s: Not enough PE# available, disabling device\n", 287 pci_name(dev)); 288 return NULL; 289 } 290 291 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 292 * pointer in the PE data structure, both should be destroyed at the 293 * same time. However, this needs to be looked at more closely again 294 * once we actually start removing things (Hotplug, SR-IOV, ...) 295 * 296 * At some point we want to remove the PDN completely anyways 297 */ 298 pe = &phb->ioda.pe_array[pe_num]; 299 pci_dev_get(dev); 300 pdn->pcidev = dev; 301 pdn->pe_number = pe_num; 302 pe->pdev = dev; 303 pe->pbus = NULL; 304 pe->tce32_seg = -1; 305 pe->mve_number = -1; 306 pe->rid = dev->bus->number << 8 | pdn->devfn; 307 308 pe_info(pe, "Associated device to PE\n"); 309 310 if (pnv_ioda_configure_pe(phb, pe)) { 311 /* XXX What do we do here ? */ 312 if (pe_num) 313 pnv_ioda_free_pe(phb, pe_num); 314 pdn->pe_number = IODA_INVALID_PE; 315 pe->pdev = NULL; 316 pci_dev_put(dev); 317 return NULL; 318 } 319 320 /* Assign a DMA weight to the device */ 321 pe->dma_weight = pnv_ioda_dma_weight(dev); 322 if (pe->dma_weight != 0) { 323 phb->ioda.dma_weight += pe->dma_weight; 324 phb->ioda.dma_pe_count++; 325 } 326 327 /* Link the PE */ 328 pnv_ioda_link_pe_by_weight(phb, pe); 329 330 return pe; 331 } 332 #endif /* Useful for SRIOV case */ 333 334 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 335 { 336 struct pci_dev *dev; 337 338 list_for_each_entry(dev, &bus->devices, bus_list) { 339 struct pci_dn *pdn = pci_get_pdn(dev); 340 341 if (pdn == NULL) { 342 pr_warn("%s: No device node associated with device !\n", 343 pci_name(dev)); 344 continue; 345 } 346 pci_dev_get(dev); 347 pdn->pcidev = dev; 348 pdn->pe_number = pe->pe_number; 349 pe->dma_weight += pnv_ioda_dma_weight(dev); 350 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 351 pnv_ioda_setup_same_PE(dev->subordinate, pe); 352 } 353 } 354 355 /* 356 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 357 * single PCI bus. Another one that contains the primary PCI bus and its 358 * subordinate PCI devices and buses. The second type of PE is normally 359 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 360 */ 361 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 362 { 363 struct pci_controller *hose = pci_bus_to_host(bus); 364 struct pnv_phb *phb = hose->private_data; 365 struct pnv_ioda_pe *pe; 366 int pe_num; 367 368 pe_num = pnv_ioda_alloc_pe(phb); 369 if (pe_num == IODA_INVALID_PE) { 370 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 371 __func__, pci_domain_nr(bus), bus->number); 372 return; 373 } 374 375 pe = &phb->ioda.pe_array[pe_num]; 376 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 377 pe->pbus = bus; 378 pe->pdev = NULL; 379 pe->tce32_seg = -1; 380 pe->mve_number = -1; 381 pe->rid = bus->busn_res.start << 8; 382 pe->dma_weight = 0; 383 384 if (all) 385 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 386 bus->busn_res.start, bus->busn_res.end, pe_num); 387 else 388 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 389 bus->busn_res.start, pe_num); 390 391 if (pnv_ioda_configure_pe(phb, pe)) { 392 /* XXX What do we do here ? */ 393 if (pe_num) 394 pnv_ioda_free_pe(phb, pe_num); 395 pe->pbus = NULL; 396 return; 397 } 398 399 /* Associate it with all child devices */ 400 pnv_ioda_setup_same_PE(bus, pe); 401 402 /* Put PE to the list */ 403 list_add_tail(&pe->list, &phb->ioda.pe_list); 404 405 /* Account for one DMA PE if at least one DMA capable device exist 406 * below the bridge 407 */ 408 if (pe->dma_weight != 0) { 409 phb->ioda.dma_weight += pe->dma_weight; 410 phb->ioda.dma_pe_count++; 411 } 412 413 /* Link the PE */ 414 pnv_ioda_link_pe_by_weight(phb, pe); 415 } 416 417 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 418 { 419 struct pci_dev *dev; 420 421 pnv_ioda_setup_bus_PE(bus, 0); 422 423 list_for_each_entry(dev, &bus->devices, bus_list) { 424 if (dev->subordinate) { 425 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 426 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 427 else 428 pnv_ioda_setup_PEs(dev->subordinate); 429 } 430 } 431 } 432 433 /* 434 * Configure PEs so that the downstream PCI buses and devices 435 * could have their associated PE#. Unfortunately, we didn't 436 * figure out the way to identify the PLX bridge yet. So we 437 * simply put the PCI bus and the subordinate behind the root 438 * port to PE# here. The game rule here is expected to be changed 439 * as soon as we can detected PLX bridge correctly. 440 */ 441 static void pnv_pci_ioda_setup_PEs(void) 442 { 443 struct pci_controller *hose, *tmp; 444 445 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 446 pnv_ioda_setup_PEs(hose->bus); 447 } 448 } 449 450 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 451 { 452 struct pci_dn *pdn = pci_get_pdn(pdev); 453 struct pnv_ioda_pe *pe; 454 455 /* 456 * The function can be called while the PE# 457 * hasn't been assigned. Do nothing for the 458 * case. 459 */ 460 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 461 return; 462 463 pe = &phb->ioda.pe_array[pdn->pe_number]; 464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 465 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 466 } 467 468 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 469 struct pci_dev *pdev, u64 dma_mask) 470 { 471 struct pci_dn *pdn = pci_get_pdn(pdev); 472 struct pnv_ioda_pe *pe; 473 uint64_t top; 474 bool bypass = false; 475 476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 477 return -ENODEV;; 478 479 pe = &phb->ioda.pe_array[pdn->pe_number]; 480 if (pe->tce_bypass_enabled) { 481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 482 bypass = (dma_mask >= top); 483 } 484 485 if (bypass) { 486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 487 set_dma_ops(&pdev->dev, &dma_direct_ops); 488 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 489 } else { 490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 491 set_dma_ops(&pdev->dev, &dma_iommu_ops); 492 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 493 } 494 return 0; 495 } 496 497 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 498 { 499 struct pci_dev *dev; 500 501 list_for_each_entry(dev, &bus->devices, bus_list) { 502 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); 503 if (dev->subordinate) 504 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 505 } 506 } 507 508 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 509 struct iommu_table *tbl, 510 __be64 *startp, __be64 *endp, bool rm) 511 { 512 __be64 __iomem *invalidate = rm ? 513 (__be64 __iomem *)pe->tce_inval_reg_phys : 514 (__be64 __iomem *)tbl->it_index; 515 unsigned long start, end, inc; 516 517 start = __pa(startp); 518 end = __pa(endp); 519 520 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 521 if (tbl->it_busno) { 522 start <<= 12; 523 end <<= 12; 524 inc = 128 << 12; 525 start |= tbl->it_busno; 526 end |= tbl->it_busno; 527 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 528 /* p7ioc-style invalidation, 2 TCEs per write */ 529 start |= (1ull << 63); 530 end |= (1ull << 63); 531 inc = 16; 532 } else { 533 /* Default (older HW) */ 534 inc = 128; 535 } 536 537 end |= inc - 1; /* round up end to be different than start */ 538 539 mb(); /* Ensure above stores are visible */ 540 while (start <= end) { 541 if (rm) 542 __raw_rm_writeq(cpu_to_be64(start), invalidate); 543 else 544 __raw_writeq(cpu_to_be64(start), invalidate); 545 start += inc; 546 } 547 548 /* 549 * The iommu layer will do another mb() for us on build() 550 * and we don't care on free() 551 */ 552 } 553 554 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 555 struct iommu_table *tbl, 556 __be64 *startp, __be64 *endp, bool rm) 557 { 558 unsigned long start, end, inc; 559 __be64 __iomem *invalidate = rm ? 560 (__be64 __iomem *)pe->tce_inval_reg_phys : 561 (__be64 __iomem *)tbl->it_index; 562 563 /* We'll invalidate DMA address in PE scope */ 564 start = 0x2ul << 60; 565 start |= (pe->pe_number & 0xFF); 566 end = start; 567 568 /* Figure out the start, end and step */ 569 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 570 start |= (inc << 12); 571 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 572 end |= (inc << 12); 573 inc = (0x1ul << 12); 574 mb(); 575 576 while (start <= end) { 577 if (rm) 578 __raw_rm_writeq(cpu_to_be64(start), invalidate); 579 else 580 __raw_writeq(cpu_to_be64(start), invalidate); 581 start += inc; 582 } 583 } 584 585 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 586 __be64 *startp, __be64 *endp, bool rm) 587 { 588 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 589 tce32_table); 590 struct pnv_phb *phb = pe->phb; 591 592 if (phb->type == PNV_PHB_IODA1) 593 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 594 else 595 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 596 } 597 598 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 599 struct pnv_ioda_pe *pe, unsigned int base, 600 unsigned int segs) 601 { 602 603 struct page *tce_mem = NULL; 604 const __be64 *swinvp; 605 struct iommu_table *tbl; 606 unsigned int i; 607 int64_t rc; 608 void *addr; 609 610 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 611 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 612 613 /* XXX FIXME: Handle 64-bit only DMA devices */ 614 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 615 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 616 617 /* We shouldn't already have a 32-bit DMA associated */ 618 if (WARN_ON(pe->tce32_seg >= 0)) 619 return; 620 621 /* Grab a 32-bit TCE table */ 622 pe->tce32_seg = base; 623 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 624 (base << 28), ((base + segs) << 28) - 1); 625 626 /* XXX Currently, we allocate one big contiguous table for the 627 * TCEs. We only really need one chunk per 256M of TCE space 628 * (ie per segment) but that's an optimization for later, it 629 * requires some added smarts with our get/put_tce implementation 630 */ 631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 632 get_order(TCE32_TABLE_SIZE * segs)); 633 if (!tce_mem) { 634 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 635 goto fail; 636 } 637 addr = page_address(tce_mem); 638 memset(addr, 0, TCE32_TABLE_SIZE * segs); 639 640 /* Configure HW */ 641 for (i = 0; i < segs; i++) { 642 rc = opal_pci_map_pe_dma_window(phb->opal_id, 643 pe->pe_number, 644 base + i, 1, 645 __pa(addr) + TCE32_TABLE_SIZE * i, 646 TCE32_TABLE_SIZE, 0x1000); 647 if (rc) { 648 pe_err(pe, " Failed to configure 32-bit TCE table," 649 " err %ld\n", rc); 650 goto fail; 651 } 652 } 653 654 /* Setup linux iommu table */ 655 tbl = &pe->tce32_table; 656 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 657 base << 28); 658 659 /* OPAL variant of P7IOC SW invalidated TCEs */ 660 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 661 if (swinvp) { 662 /* We need a couple more fields -- an address and a data 663 * to or. Since the bus is only printed out on table free 664 * errors, and on the first pass the data will be a relative 665 * bus number, print that out instead. 666 */ 667 tbl->it_busno = 0; 668 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 669 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 670 8); 671 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | 672 TCE_PCI_SWINV_PAIR; 673 } 674 iommu_init_table(tbl, phb->hose->node); 675 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 676 677 if (pe->pdev) 678 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 679 else 680 pnv_ioda_setup_bus_dma(pe, pe->pbus); 681 682 return; 683 fail: 684 /* XXX Failure: Try to fallback to 64-bit only ? */ 685 if (pe->tce32_seg >= 0) 686 pe->tce32_seg = -1; 687 if (tce_mem) 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 689 } 690 691 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 692 { 693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 694 tce32_table); 695 uint16_t window_id = (pe->pe_number << 1 ) + 1; 696 int64_t rc; 697 698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 699 if (enable) { 700 phys_addr_t top = memblock_end_of_DRAM(); 701 702 top = roundup_pow_of_two(top); 703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 704 pe->pe_number, 705 window_id, 706 pe->tce_bypass_base, 707 top); 708 } else { 709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 710 pe->pe_number, 711 window_id, 712 pe->tce_bypass_base, 713 0); 714 715 /* 716 * We might want to reset the DMA ops of all devices on 717 * this PE. However in theory, that shouldn't be necessary 718 * as this is used for VFIO/KVM pass-through and the device 719 * hasn't yet been returned to its kernel driver 720 */ 721 } 722 if (rc) 723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 724 else 725 pe->tce_bypass_enabled = enable; 726 } 727 728 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 729 struct pnv_ioda_pe *pe) 730 { 731 /* TVE #1 is selected by PCI address bit 59 */ 732 pe->tce_bypass_base = 1ull << 59; 733 734 /* Install set_bypass callback for VFIO */ 735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 736 737 /* Enable bypass by default */ 738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 739 } 740 741 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 742 struct pnv_ioda_pe *pe) 743 { 744 struct page *tce_mem = NULL; 745 void *addr; 746 const __be64 *swinvp; 747 struct iommu_table *tbl; 748 unsigned int tce_table_size, end; 749 int64_t rc; 750 751 /* We shouldn't already have a 32-bit DMA associated */ 752 if (WARN_ON(pe->tce32_seg >= 0)) 753 return; 754 755 /* The PE will reserve all possible 32-bits space */ 756 pe->tce32_seg = 0; 757 end = (1 << ilog2(phb->ioda.m32_pci_base)); 758 tce_table_size = (end / 0x1000) * 8; 759 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 760 end); 761 762 /* Allocate TCE table */ 763 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 764 get_order(tce_table_size)); 765 if (!tce_mem) { 766 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 767 goto fail; 768 } 769 addr = page_address(tce_mem); 770 memset(addr, 0, tce_table_size); 771 772 /* 773 * Map TCE table through TVT. The TVE index is the PE number 774 * shifted by 1 bit for 32-bits DMA space. 775 */ 776 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 777 pe->pe_number << 1, 1, __pa(addr), 778 tce_table_size, 0x1000); 779 if (rc) { 780 pe_err(pe, "Failed to configure 32-bit TCE table," 781 " err %ld\n", rc); 782 goto fail; 783 } 784 785 /* Setup linux iommu table */ 786 tbl = &pe->tce32_table; 787 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 788 789 /* OPAL variant of PHB3 invalidated TCEs */ 790 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 791 if (swinvp) { 792 /* We need a couple more fields -- an address and a data 793 * to or. Since the bus is only printed out on table free 794 * errors, and on the first pass the data will be a relative 795 * bus number, print that out instead. 796 */ 797 tbl->it_busno = 0; 798 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 799 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 800 8); 801 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; 802 } 803 iommu_init_table(tbl, phb->hose->node); 804 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 805 806 if (pe->pdev) 807 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 808 else 809 pnv_ioda_setup_bus_dma(pe, pe->pbus); 810 811 /* Also create a bypass window */ 812 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 813 return; 814 fail: 815 if (pe->tce32_seg >= 0) 816 pe->tce32_seg = -1; 817 if (tce_mem) 818 __free_pages(tce_mem, get_order(tce_table_size)); 819 } 820 821 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 822 { 823 struct pci_controller *hose = phb->hose; 824 unsigned int residual, remaining, segs, tw, base; 825 struct pnv_ioda_pe *pe; 826 827 /* If we have more PE# than segments available, hand out one 828 * per PE until we run out and let the rest fail. If not, 829 * then we assign at least one segment per PE, plus more based 830 * on the amount of devices under that PE 831 */ 832 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 833 residual = 0; 834 else 835 residual = phb->ioda.tce32_count - 836 phb->ioda.dma_pe_count; 837 838 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 839 hose->global_number, phb->ioda.tce32_count); 840 pr_info("PCI: %d PE# for a total weight of %d\n", 841 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 842 843 /* Walk our PE list and configure their DMA segments, hand them 844 * out one base segment plus any residual segments based on 845 * weight 846 */ 847 remaining = phb->ioda.tce32_count; 848 tw = phb->ioda.dma_weight; 849 base = 0; 850 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 851 if (!pe->dma_weight) 852 continue; 853 if (!remaining) { 854 pe_warn(pe, "No DMA32 resources available\n"); 855 continue; 856 } 857 segs = 1; 858 if (residual) { 859 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 860 if (segs > remaining) 861 segs = remaining; 862 } 863 864 /* 865 * For IODA2 compliant PHB3, we needn't care about the weight. 866 * The all available 32-bits DMA space will be assigned to 867 * the specific PE. 868 */ 869 if (phb->type == PNV_PHB_IODA1) { 870 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 871 pe->dma_weight, segs); 872 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 873 } else { 874 pe_info(pe, "Assign DMA32 space\n"); 875 segs = 0; 876 pnv_pci_ioda2_setup_dma_pe(phb, pe); 877 } 878 879 remaining -= segs; 880 base += segs; 881 } 882 } 883 884 #ifdef CONFIG_PCI_MSI 885 static void pnv_ioda2_msi_eoi(struct irq_data *d) 886 { 887 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 888 struct irq_chip *chip = irq_data_get_irq_chip(d); 889 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 890 ioda.irq_chip); 891 int64_t rc; 892 893 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 894 WARN_ON_ONCE(rc); 895 896 icp_native_eoi(d); 897 } 898 899 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 900 unsigned int hwirq, unsigned int virq, 901 unsigned int is_64, struct msi_msg *msg) 902 { 903 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 904 struct pci_dn *pdn = pci_get_pdn(dev); 905 struct irq_data *idata; 906 struct irq_chip *ichip; 907 unsigned int xive_num = hwirq - phb->msi_base; 908 __be32 data; 909 int rc; 910 911 /* No PE assigned ? bail out ... no MSI for you ! */ 912 if (pe == NULL) 913 return -ENXIO; 914 915 /* Check if we have an MVE */ 916 if (pe->mve_number < 0) 917 return -ENXIO; 918 919 /* Force 32-bit MSI on some broken devices */ 920 if (pdn && pdn->force_32bit_msi) 921 is_64 = 0; 922 923 /* Assign XIVE to PE */ 924 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 925 if (rc) { 926 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 927 pci_name(dev), rc, xive_num); 928 return -EIO; 929 } 930 931 if (is_64) { 932 __be64 addr64; 933 934 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 935 &addr64, &data); 936 if (rc) { 937 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 938 pci_name(dev), rc); 939 return -EIO; 940 } 941 msg->address_hi = be64_to_cpu(addr64) >> 32; 942 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 943 } else { 944 __be32 addr32; 945 946 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 947 &addr32, &data); 948 if (rc) { 949 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 950 pci_name(dev), rc); 951 return -EIO; 952 } 953 msg->address_hi = 0; 954 msg->address_lo = be32_to_cpu(addr32); 955 } 956 msg->data = be32_to_cpu(data); 957 958 /* 959 * Change the IRQ chip for the MSI interrupts on PHB3. 960 * The corresponding IRQ chip should be populated for 961 * the first time. 962 */ 963 if (phb->type == PNV_PHB_IODA2) { 964 if (!phb->ioda.irq_chip_init) { 965 idata = irq_get_irq_data(virq); 966 ichip = irq_data_get_irq_chip(idata); 967 phb->ioda.irq_chip_init = 1; 968 phb->ioda.irq_chip = *ichip; 969 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 970 } 971 972 irq_set_chip(virq, &phb->ioda.irq_chip); 973 } 974 975 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 976 " address=%x_%08x data=%x PE# %d\n", 977 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 978 msg->address_hi, msg->address_lo, data, pe->pe_number); 979 980 return 0; 981 } 982 983 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 984 { 985 unsigned int count; 986 const __be32 *prop = of_get_property(phb->hose->dn, 987 "ibm,opal-msi-ranges", NULL); 988 if (!prop) { 989 /* BML Fallback */ 990 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 991 } 992 if (!prop) 993 return; 994 995 phb->msi_base = be32_to_cpup(prop); 996 count = be32_to_cpup(prop + 1); 997 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 998 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 999 phb->hose->global_number); 1000 return; 1001 } 1002 1003 phb->msi_setup = pnv_pci_ioda_msi_setup; 1004 phb->msi32_support = 1; 1005 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1006 count, phb->msi_base); 1007 } 1008 #else 1009 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1010 #endif /* CONFIG_PCI_MSI */ 1011 1012 /* 1013 * This function is supposed to be called on basis of PE from top 1014 * to bottom style. So the the I/O or MMIO segment assigned to 1015 * parent PE could be overrided by its child PEs if necessary. 1016 */ 1017 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1018 struct pnv_ioda_pe *pe) 1019 { 1020 struct pnv_phb *phb = hose->private_data; 1021 struct pci_bus_region region; 1022 struct resource *res; 1023 int i, index; 1024 int rc; 1025 1026 /* 1027 * NOTE: We only care PCI bus based PE for now. For PCI 1028 * device based PE, for example SRIOV sensitive VF should 1029 * be figured out later. 1030 */ 1031 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1032 1033 pci_bus_for_each_resource(pe->pbus, res, i) { 1034 if (!res || !res->flags || 1035 res->start > res->end) 1036 continue; 1037 1038 if (res->flags & IORESOURCE_IO) { 1039 region.start = res->start - phb->ioda.io_pci_base; 1040 region.end = res->end - phb->ioda.io_pci_base; 1041 index = region.start / phb->ioda.io_segsize; 1042 1043 while (index < phb->ioda.total_pe && 1044 region.start <= region.end) { 1045 phb->ioda.io_segmap[index] = pe->pe_number; 1046 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1047 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1048 if (rc != OPAL_SUCCESS) { 1049 pr_err("%s: OPAL error %d when mapping IO " 1050 "segment #%d to PE#%d\n", 1051 __func__, rc, index, pe->pe_number); 1052 break; 1053 } 1054 1055 region.start += phb->ioda.io_segsize; 1056 index++; 1057 } 1058 } else if (res->flags & IORESOURCE_MEM) { 1059 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 1060 * harden that algorithm when we start supporting M64 1061 */ 1062 region.start = res->start - 1063 hose->mem_offset[0] - 1064 phb->ioda.m32_pci_base; 1065 region.end = res->end - 1066 hose->mem_offset[0] - 1067 phb->ioda.m32_pci_base; 1068 index = region.start / phb->ioda.m32_segsize; 1069 1070 while (index < phb->ioda.total_pe && 1071 region.start <= region.end) { 1072 phb->ioda.m32_segmap[index] = pe->pe_number; 1073 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1074 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1075 if (rc != OPAL_SUCCESS) { 1076 pr_err("%s: OPAL error %d when mapping M32 " 1077 "segment#%d to PE#%d", 1078 __func__, rc, index, pe->pe_number); 1079 break; 1080 } 1081 1082 region.start += phb->ioda.m32_segsize; 1083 index++; 1084 } 1085 } 1086 } 1087 } 1088 1089 static void pnv_pci_ioda_setup_seg(void) 1090 { 1091 struct pci_controller *tmp, *hose; 1092 struct pnv_phb *phb; 1093 struct pnv_ioda_pe *pe; 1094 1095 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1096 phb = hose->private_data; 1097 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1098 pnv_ioda_setup_pe_seg(hose, pe); 1099 } 1100 } 1101 } 1102 1103 static void pnv_pci_ioda_setup_DMA(void) 1104 { 1105 struct pci_controller *hose, *tmp; 1106 struct pnv_phb *phb; 1107 1108 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1109 pnv_ioda_setup_dma(hose->private_data); 1110 1111 /* Mark the PHB initialization done */ 1112 phb = hose->private_data; 1113 phb->initialized = 1; 1114 } 1115 } 1116 1117 static void pnv_pci_ioda_create_dbgfs(void) 1118 { 1119 #ifdef CONFIG_DEBUG_FS 1120 struct pci_controller *hose, *tmp; 1121 struct pnv_phb *phb; 1122 char name[16]; 1123 1124 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1125 phb = hose->private_data; 1126 1127 sprintf(name, "PCI%04x", hose->global_number); 1128 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1129 if (!phb->dbgfs) 1130 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1131 __func__, hose->global_number); 1132 } 1133 #endif /* CONFIG_DEBUG_FS */ 1134 } 1135 1136 static void pnv_pci_ioda_fixup(void) 1137 { 1138 pnv_pci_ioda_setup_PEs(); 1139 pnv_pci_ioda_setup_seg(); 1140 pnv_pci_ioda_setup_DMA(); 1141 1142 pnv_pci_ioda_create_dbgfs(); 1143 1144 #ifdef CONFIG_EEH 1145 eeh_probe_mode_set(EEH_PROBE_MODE_DEV); 1146 eeh_addr_cache_build(); 1147 eeh_init(); 1148 #endif 1149 } 1150 1151 /* 1152 * Returns the alignment for I/O or memory windows for P2P 1153 * bridges. That actually depends on how PEs are segmented. 1154 * For now, we return I/O or M32 segment size for PE sensitive 1155 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1156 * 1MiB for memory) will be returned. 1157 * 1158 * The current PCI bus might be put into one PE, which was 1159 * create against the parent PCI bridge. For that case, we 1160 * needn't enlarge the alignment so that we can save some 1161 * resources. 1162 */ 1163 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1164 unsigned long type) 1165 { 1166 struct pci_dev *bridge; 1167 struct pci_controller *hose = pci_bus_to_host(bus); 1168 struct pnv_phb *phb = hose->private_data; 1169 int num_pci_bridges = 0; 1170 1171 bridge = bus->self; 1172 while (bridge) { 1173 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1174 num_pci_bridges++; 1175 if (num_pci_bridges >= 2) 1176 return 1; 1177 } 1178 1179 bridge = bridge->bus->self; 1180 } 1181 1182 /* We need support prefetchable memory window later */ 1183 if (type & IORESOURCE_MEM) 1184 return phb->ioda.m32_segsize; 1185 1186 return phb->ioda.io_segsize; 1187 } 1188 1189 /* Prevent enabling devices for which we couldn't properly 1190 * assign a PE 1191 */ 1192 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1193 { 1194 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1195 struct pnv_phb *phb = hose->private_data; 1196 struct pci_dn *pdn; 1197 1198 /* The function is probably called while the PEs have 1199 * not be created yet. For example, resource reassignment 1200 * during PCI probe period. We just skip the check if 1201 * PEs isn't ready. 1202 */ 1203 if (!phb->initialized) 1204 return 0; 1205 1206 pdn = pci_get_pdn(dev); 1207 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1208 return -EINVAL; 1209 1210 return 0; 1211 } 1212 1213 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1214 u32 devfn) 1215 { 1216 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1217 } 1218 1219 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1220 { 1221 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1222 OPAL_ASSERT_RESET); 1223 } 1224 1225 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1226 u64 hub_id, int ioda_type) 1227 { 1228 struct pci_controller *hose; 1229 struct pnv_phb *phb; 1230 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1231 const __be64 *prop64; 1232 const __be32 *prop32; 1233 int len; 1234 u64 phb_id; 1235 void *aux; 1236 long rc; 1237 1238 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1239 1240 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1241 if (!prop64) { 1242 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1243 return; 1244 } 1245 phb_id = be64_to_cpup(prop64); 1246 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1247 1248 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1249 if (!phb) { 1250 pr_err(" Out of memory !\n"); 1251 return; 1252 } 1253 1254 /* Allocate PCI controller */ 1255 memset(phb, 0, sizeof(struct pnv_phb)); 1256 phb->hose = hose = pcibios_alloc_controller(np); 1257 if (!phb->hose) { 1258 pr_err(" Can't allocate PCI controller for %s\n", 1259 np->full_name); 1260 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1261 return; 1262 } 1263 1264 spin_lock_init(&phb->lock); 1265 prop32 = of_get_property(np, "bus-range", &len); 1266 if (prop32 && len == 8) { 1267 hose->first_busno = be32_to_cpu(prop32[0]); 1268 hose->last_busno = be32_to_cpu(prop32[1]); 1269 } else { 1270 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1271 hose->first_busno = 0; 1272 hose->last_busno = 0xff; 1273 } 1274 hose->private_data = phb; 1275 phb->hub_id = hub_id; 1276 phb->opal_id = phb_id; 1277 phb->type = ioda_type; 1278 1279 /* Detect specific models for error handling */ 1280 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1281 phb->model = PNV_PHB_MODEL_P7IOC; 1282 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1283 phb->model = PNV_PHB_MODEL_PHB3; 1284 else 1285 phb->model = PNV_PHB_MODEL_UNKNOWN; 1286 1287 /* Parse 32-bit and IO ranges (if any) */ 1288 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1289 1290 /* Get registers */ 1291 phb->regs = of_iomap(np, 0); 1292 if (phb->regs == NULL) 1293 pr_err(" Failed to map registers !\n"); 1294 1295 /* Initialize more IODA stuff */ 1296 phb->ioda.total_pe = 1; 1297 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1298 if (prop32) 1299 phb->ioda.total_pe = be32_to_cpup(prop32); 1300 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1301 if (prop32) 1302 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1303 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1304 /* FW Has already off top 64k of M32 space (MSI space) */ 1305 phb->ioda.m32_size += 0x10000; 1306 1307 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1308 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1309 phb->ioda.io_size = hose->pci_io_size; 1310 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1311 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1312 1313 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1314 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1315 m32map_off = size; 1316 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1317 if (phb->type == PNV_PHB_IODA1) { 1318 iomap_off = size; 1319 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1320 } 1321 pemap_off = size; 1322 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1323 aux = alloc_bootmem(size); 1324 memset(aux, 0, size); 1325 phb->ioda.pe_alloc = aux; 1326 phb->ioda.m32_segmap = aux + m32map_off; 1327 if (phb->type == PNV_PHB_IODA1) 1328 phb->ioda.io_segmap = aux + iomap_off; 1329 phb->ioda.pe_array = aux + pemap_off; 1330 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1331 1332 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1333 INIT_LIST_HEAD(&phb->ioda.pe_list); 1334 1335 /* Calculate how many 32-bit TCE segments we have */ 1336 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1337 1338 /* Clear unusable m64 */ 1339 hose->mem_resources[1].flags = 0; 1340 hose->mem_resources[1].start = 0; 1341 hose->mem_resources[1].end = 0; 1342 hose->mem_resources[2].flags = 0; 1343 hose->mem_resources[2].start = 0; 1344 hose->mem_resources[2].end = 0; 1345 1346 #if 0 /* We should really do that ... */ 1347 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1348 window_type, 1349 window_num, 1350 starting_real_address, 1351 starting_pci_address, 1352 segment_size); 1353 #endif 1354 1355 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" 1356 " IO: 0x%x [segment=0x%x]\n", 1357 phb->ioda.total_pe, 1358 phb->ioda.reserved_pe, 1359 phb->ioda.m32_size, phb->ioda.m32_segsize, 1360 phb->ioda.io_size, phb->ioda.io_segsize); 1361 1362 phb->hose->ops = &pnv_pci_ops; 1363 #ifdef CONFIG_EEH 1364 phb->eeh_ops = &ioda_eeh_ops; 1365 #endif 1366 1367 /* Setup RID -> PE mapping function */ 1368 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1369 1370 /* Setup TCEs */ 1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1373 1374 /* Setup shutdown function for kexec */ 1375 phb->shutdown = pnv_pci_ioda_shutdown; 1376 1377 /* Setup MSI support */ 1378 pnv_pci_init_ioda_msis(phb); 1379 1380 /* 1381 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1382 * to let the PCI core do resource assignment. It's supposed 1383 * that the PCI core will do correct I/O and MMIO alignment 1384 * for the P2P bridge bars so that each PCI bus (excluding 1385 * the child P2P bridges) can form individual PE. 1386 */ 1387 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1388 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1389 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1390 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1391 1392 /* Reset IODA tables to a clean state */ 1393 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1394 if (rc) 1395 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1396 } 1397 1398 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 1399 { 1400 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1401 } 1402 1403 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1404 { 1405 struct device_node *phbn; 1406 const __be64 *prop64; 1407 u64 hub_id; 1408 1409 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1410 1411 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1412 if (!prop64) { 1413 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1414 return; 1415 } 1416 hub_id = be64_to_cpup(prop64); 1417 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1418 1419 /* Count child PHBs */ 1420 for_each_child_of_node(np, phbn) { 1421 /* Look for IODA1 PHBs */ 1422 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1423 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1424 } 1425 } 1426