1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/crash_dump.h> 17 #include <linux/debugfs.h> 18 #include <linux/delay.h> 19 #include <linux/string.h> 20 #include <linux/init.h> 21 #include <linux/bootmem.h> 22 #include <linux/irq.h> 23 #include <linux/io.h> 24 #include <linux/msi.h> 25 #include <linux/memblock.h> 26 27 #include <asm/sections.h> 28 #include <asm/io.h> 29 #include <asm/prom.h> 30 #include <asm/pci-bridge.h> 31 #include <asm/machdep.h> 32 #include <asm/msi_bitmap.h> 33 #include <asm/ppc-pci.h> 34 #include <asm/opal.h> 35 #include <asm/iommu.h> 36 #include <asm/tce.h> 37 #include <asm/xics.h> 38 #include <asm/debug.h> 39 40 #include "powernv.h" 41 #include "pci.h" 42 43 #define define_pe_printk_level(func, kern_level) \ 44 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 45 { \ 46 struct va_format vaf; \ 47 va_list args; \ 48 char pfix[32]; \ 49 int r; \ 50 \ 51 va_start(args, fmt); \ 52 \ 53 vaf.fmt = fmt; \ 54 vaf.va = &args; \ 55 \ 56 if (pe->pdev) \ 57 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 58 sizeof(pfix)); \ 59 else \ 60 sprintf(pfix, "%04x:%02x ", \ 61 pci_domain_nr(pe->pbus), \ 62 pe->pbus->number); \ 63 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 64 pfix, pe->pe_number, &vaf); \ 65 \ 66 va_end(args); \ 67 \ 68 return r; \ 69 } \ 70 71 define_pe_printk_level(pe_err, KERN_ERR); 72 define_pe_printk_level(pe_warn, KERN_WARNING); 73 define_pe_printk_level(pe_info, KERN_INFO); 74 75 /* 76 * stdcix is only supposed to be used in hypervisor real mode as per 77 * the architecture spec 78 */ 79 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 80 { 81 __asm__ __volatile__("stdcix %0,0,%1" 82 : : "r" (val), "r" (paddr) : "memory"); 83 } 84 85 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 86 { 87 unsigned long pe; 88 89 do { 90 pe = find_next_zero_bit(phb->ioda.pe_alloc, 91 phb->ioda.total_pe, 0); 92 if (pe >= phb->ioda.total_pe) 93 return IODA_INVALID_PE; 94 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 95 96 phb->ioda.pe_array[pe].phb = phb; 97 phb->ioda.pe_array[pe].pe_number = pe; 98 return pe; 99 } 100 101 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 102 { 103 WARN_ON(phb->ioda.pe_array[pe].pdev); 104 105 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 106 clear_bit(pe, phb->ioda.pe_alloc); 107 } 108 109 /* Currently those 2 are only used when MSIs are enabled, this will change 110 * but in the meantime, we need to protect them to avoid warnings 111 */ 112 #ifdef CONFIG_PCI_MSI 113 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 114 { 115 struct pci_controller *hose = pci_bus_to_host(dev->bus); 116 struct pnv_phb *phb = hose->private_data; 117 struct pci_dn *pdn = pci_get_pdn(dev); 118 119 if (!pdn) 120 return NULL; 121 if (pdn->pe_number == IODA_INVALID_PE) 122 return NULL; 123 return &phb->ioda.pe_array[pdn->pe_number]; 124 } 125 #endif /* CONFIG_PCI_MSI */ 126 127 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 128 { 129 struct pci_dev *parent; 130 uint8_t bcomp, dcomp, fcomp; 131 long rc, rid_end, rid; 132 133 /* Bus validation ? */ 134 if (pe->pbus) { 135 int count; 136 137 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 138 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 139 parent = pe->pbus->self; 140 if (pe->flags & PNV_IODA_PE_BUS_ALL) 141 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 142 else 143 count = 1; 144 145 switch(count) { 146 case 1: bcomp = OpalPciBusAll; break; 147 case 2: bcomp = OpalPciBus7Bits; break; 148 case 4: bcomp = OpalPciBus6Bits; break; 149 case 8: bcomp = OpalPciBus5Bits; break; 150 case 16: bcomp = OpalPciBus4Bits; break; 151 case 32: bcomp = OpalPciBus3Bits; break; 152 default: 153 pr_err("%s: Number of subordinate busses %d" 154 " unsupported\n", 155 pci_name(pe->pbus->self), count); 156 /* Do an exact match only */ 157 bcomp = OpalPciBusAll; 158 } 159 rid_end = pe->rid + (count << 8); 160 } else { 161 parent = pe->pdev->bus->self; 162 bcomp = OpalPciBusAll; 163 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 164 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 165 rid_end = pe->rid + 1; 166 } 167 168 /* 169 * Associate PE in PELT. We need add the PE into the 170 * corresponding PELT-V as well. Otherwise, the error 171 * originated from the PE might contribute to other 172 * PEs. 173 */ 174 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 175 bcomp, dcomp, fcomp, OPAL_MAP_PE); 176 if (rc) { 177 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 178 return -ENXIO; 179 } 180 181 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 182 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 183 if (rc) 184 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 185 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 186 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 187 188 /* Add to all parents PELT-V */ 189 while (parent) { 190 struct pci_dn *pdn = pci_get_pdn(parent); 191 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 192 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 193 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 194 /* XXX What to do in case of error ? */ 195 } 196 parent = parent->bus->self; 197 } 198 /* Setup reverse map */ 199 for (rid = pe->rid; rid < rid_end; rid++) 200 phb->ioda.pe_rmap[rid] = pe->pe_number; 201 202 /* Setup one MVTs on IODA1 */ 203 if (phb->type == PNV_PHB_IODA1) { 204 pe->mve_number = pe->pe_number; 205 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 206 pe->pe_number); 207 if (rc) { 208 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 209 rc, pe->mve_number); 210 pe->mve_number = -1; 211 } else { 212 rc = opal_pci_set_mve_enable(phb->opal_id, 213 pe->mve_number, OPAL_ENABLE_MVE); 214 if (rc) { 215 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 216 rc, pe->mve_number); 217 pe->mve_number = -1; 218 } 219 } 220 } else if (phb->type == PNV_PHB_IODA2) 221 pe->mve_number = 0; 222 223 return 0; 224 } 225 226 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 227 struct pnv_ioda_pe *pe) 228 { 229 struct pnv_ioda_pe *lpe; 230 231 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 232 if (lpe->dma_weight < pe->dma_weight) { 233 list_add_tail(&pe->dma_link, &lpe->dma_link); 234 return; 235 } 236 } 237 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 238 } 239 240 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 241 { 242 /* This is quite simplistic. The "base" weight of a device 243 * is 10. 0 means no DMA is to be accounted for it. 244 */ 245 246 /* If it's a bridge, no DMA */ 247 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 248 return 0; 249 250 /* Reduce the weight of slow USB controllers */ 251 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 252 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 253 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 254 return 3; 255 256 /* Increase the weight of RAID (includes Obsidian) */ 257 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 258 return 15; 259 260 /* Default */ 261 return 10; 262 } 263 264 #if 0 265 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 266 { 267 struct pci_controller *hose = pci_bus_to_host(dev->bus); 268 struct pnv_phb *phb = hose->private_data; 269 struct pci_dn *pdn = pci_get_pdn(dev); 270 struct pnv_ioda_pe *pe; 271 int pe_num; 272 273 if (!pdn) { 274 pr_err("%s: Device tree node not associated properly\n", 275 pci_name(dev)); 276 return NULL; 277 } 278 if (pdn->pe_number != IODA_INVALID_PE) 279 return NULL; 280 281 /* PE#0 has been pre-set */ 282 if (dev->bus->number == 0) 283 pe_num = 0; 284 else 285 pe_num = pnv_ioda_alloc_pe(phb); 286 if (pe_num == IODA_INVALID_PE) { 287 pr_warning("%s: Not enough PE# available, disabling device\n", 288 pci_name(dev)); 289 return NULL; 290 } 291 292 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 293 * pointer in the PE data structure, both should be destroyed at the 294 * same time. However, this needs to be looked at more closely again 295 * once we actually start removing things (Hotplug, SR-IOV, ...) 296 * 297 * At some point we want to remove the PDN completely anyways 298 */ 299 pe = &phb->ioda.pe_array[pe_num]; 300 pci_dev_get(dev); 301 pdn->pcidev = dev; 302 pdn->pe_number = pe_num; 303 pe->pdev = dev; 304 pe->pbus = NULL; 305 pe->tce32_seg = -1; 306 pe->mve_number = -1; 307 pe->rid = dev->bus->number << 8 | pdn->devfn; 308 309 pe_info(pe, "Associated device to PE\n"); 310 311 if (pnv_ioda_configure_pe(phb, pe)) { 312 /* XXX What do we do here ? */ 313 if (pe_num) 314 pnv_ioda_free_pe(phb, pe_num); 315 pdn->pe_number = IODA_INVALID_PE; 316 pe->pdev = NULL; 317 pci_dev_put(dev); 318 return NULL; 319 } 320 321 /* Assign a DMA weight to the device */ 322 pe->dma_weight = pnv_ioda_dma_weight(dev); 323 if (pe->dma_weight != 0) { 324 phb->ioda.dma_weight += pe->dma_weight; 325 phb->ioda.dma_pe_count++; 326 } 327 328 /* Link the PE */ 329 pnv_ioda_link_pe_by_weight(phb, pe); 330 331 return pe; 332 } 333 #endif /* Useful for SRIOV case */ 334 335 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 336 { 337 struct pci_dev *dev; 338 339 list_for_each_entry(dev, &bus->devices, bus_list) { 340 struct pci_dn *pdn = pci_get_pdn(dev); 341 342 if (pdn == NULL) { 343 pr_warn("%s: No device node associated with device !\n", 344 pci_name(dev)); 345 continue; 346 } 347 pdn->pcidev = dev; 348 pdn->pe_number = pe->pe_number; 349 pe->dma_weight += pnv_ioda_dma_weight(dev); 350 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 351 pnv_ioda_setup_same_PE(dev->subordinate, pe); 352 } 353 } 354 355 /* 356 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 357 * single PCI bus. Another one that contains the primary PCI bus and its 358 * subordinate PCI devices and buses. The second type of PE is normally 359 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 360 */ 361 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 362 { 363 struct pci_controller *hose = pci_bus_to_host(bus); 364 struct pnv_phb *phb = hose->private_data; 365 struct pnv_ioda_pe *pe; 366 int pe_num; 367 368 pe_num = pnv_ioda_alloc_pe(phb); 369 if (pe_num == IODA_INVALID_PE) { 370 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 371 __func__, pci_domain_nr(bus), bus->number); 372 return; 373 } 374 375 pe = &phb->ioda.pe_array[pe_num]; 376 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 377 pe->pbus = bus; 378 pe->pdev = NULL; 379 pe->tce32_seg = -1; 380 pe->mve_number = -1; 381 pe->rid = bus->busn_res.start << 8; 382 pe->dma_weight = 0; 383 384 if (all) 385 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 386 bus->busn_res.start, bus->busn_res.end, pe_num); 387 else 388 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 389 bus->busn_res.start, pe_num); 390 391 if (pnv_ioda_configure_pe(phb, pe)) { 392 /* XXX What do we do here ? */ 393 if (pe_num) 394 pnv_ioda_free_pe(phb, pe_num); 395 pe->pbus = NULL; 396 return; 397 } 398 399 /* Associate it with all child devices */ 400 pnv_ioda_setup_same_PE(bus, pe); 401 402 /* Put PE to the list */ 403 list_add_tail(&pe->list, &phb->ioda.pe_list); 404 405 /* Account for one DMA PE if at least one DMA capable device exist 406 * below the bridge 407 */ 408 if (pe->dma_weight != 0) { 409 phb->ioda.dma_weight += pe->dma_weight; 410 phb->ioda.dma_pe_count++; 411 } 412 413 /* Link the PE */ 414 pnv_ioda_link_pe_by_weight(phb, pe); 415 } 416 417 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 418 { 419 struct pci_dev *dev; 420 421 pnv_ioda_setup_bus_PE(bus, 0); 422 423 list_for_each_entry(dev, &bus->devices, bus_list) { 424 if (dev->subordinate) { 425 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 426 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 427 else 428 pnv_ioda_setup_PEs(dev->subordinate); 429 } 430 } 431 } 432 433 /* 434 * Configure PEs so that the downstream PCI buses and devices 435 * could have their associated PE#. Unfortunately, we didn't 436 * figure out the way to identify the PLX bridge yet. So we 437 * simply put the PCI bus and the subordinate behind the root 438 * port to PE# here. The game rule here is expected to be changed 439 * as soon as we can detected PLX bridge correctly. 440 */ 441 static void pnv_pci_ioda_setup_PEs(void) 442 { 443 struct pci_controller *hose, *tmp; 444 445 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 446 pnv_ioda_setup_PEs(hose->bus); 447 } 448 } 449 450 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 451 { 452 struct pci_dn *pdn = pci_get_pdn(pdev); 453 struct pnv_ioda_pe *pe; 454 455 /* 456 * The function can be called while the PE# 457 * hasn't been assigned. Do nothing for the 458 * case. 459 */ 460 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 461 return; 462 463 pe = &phb->ioda.pe_array[pdn->pe_number]; 464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 465 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 466 } 467 468 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 469 struct pci_dev *pdev, u64 dma_mask) 470 { 471 struct pci_dn *pdn = pci_get_pdn(pdev); 472 struct pnv_ioda_pe *pe; 473 uint64_t top; 474 bool bypass = false; 475 476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 477 return -ENODEV;; 478 479 pe = &phb->ioda.pe_array[pdn->pe_number]; 480 if (pe->tce_bypass_enabled) { 481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 482 bypass = (dma_mask >= top); 483 } 484 485 if (bypass) { 486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 487 set_dma_ops(&pdev->dev, &dma_direct_ops); 488 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 489 } else { 490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 491 set_dma_ops(&pdev->dev, &dma_iommu_ops); 492 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 493 } 494 return 0; 495 } 496 497 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 498 { 499 struct pci_dev *dev; 500 501 list_for_each_entry(dev, &bus->devices, bus_list) { 502 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); 503 if (dev->subordinate) 504 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 505 } 506 } 507 508 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 509 struct iommu_table *tbl, 510 __be64 *startp, __be64 *endp, bool rm) 511 { 512 __be64 __iomem *invalidate = rm ? 513 (__be64 __iomem *)pe->tce_inval_reg_phys : 514 (__be64 __iomem *)tbl->it_index; 515 unsigned long start, end, inc; 516 517 start = __pa(startp); 518 end = __pa(endp); 519 520 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 521 if (tbl->it_busno) { 522 start <<= 12; 523 end <<= 12; 524 inc = 128 << 12; 525 start |= tbl->it_busno; 526 end |= tbl->it_busno; 527 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 528 /* p7ioc-style invalidation, 2 TCEs per write */ 529 start |= (1ull << 63); 530 end |= (1ull << 63); 531 inc = 16; 532 } else { 533 /* Default (older HW) */ 534 inc = 128; 535 } 536 537 end |= inc - 1; /* round up end to be different than start */ 538 539 mb(); /* Ensure above stores are visible */ 540 while (start <= end) { 541 if (rm) 542 __raw_rm_writeq(cpu_to_be64(start), invalidate); 543 else 544 __raw_writeq(cpu_to_be64(start), invalidate); 545 start += inc; 546 } 547 548 /* 549 * The iommu layer will do another mb() for us on build() 550 * and we don't care on free() 551 */ 552 } 553 554 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 555 struct iommu_table *tbl, 556 __be64 *startp, __be64 *endp, bool rm) 557 { 558 unsigned long start, end, inc; 559 __be64 __iomem *invalidate = rm ? 560 (__be64 __iomem *)pe->tce_inval_reg_phys : 561 (__be64 __iomem *)tbl->it_index; 562 563 /* We'll invalidate DMA address in PE scope */ 564 start = 0x2ul << 60; 565 start |= (pe->pe_number & 0xFF); 566 end = start; 567 568 /* Figure out the start, end and step */ 569 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 570 start |= (inc << 12); 571 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 572 end |= (inc << 12); 573 inc = (0x1ul << 12); 574 mb(); 575 576 while (start <= end) { 577 if (rm) 578 __raw_rm_writeq(cpu_to_be64(start), invalidate); 579 else 580 __raw_writeq(cpu_to_be64(start), invalidate); 581 start += inc; 582 } 583 } 584 585 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 586 __be64 *startp, __be64 *endp, bool rm) 587 { 588 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 589 tce32_table); 590 struct pnv_phb *phb = pe->phb; 591 592 if (phb->type == PNV_PHB_IODA1) 593 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 594 else 595 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 596 } 597 598 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 599 struct pnv_ioda_pe *pe, unsigned int base, 600 unsigned int segs) 601 { 602 603 struct page *tce_mem = NULL; 604 const __be64 *swinvp; 605 struct iommu_table *tbl; 606 unsigned int i; 607 int64_t rc; 608 void *addr; 609 610 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 611 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 612 613 /* XXX FIXME: Handle 64-bit only DMA devices */ 614 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 615 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 616 617 /* We shouldn't already have a 32-bit DMA associated */ 618 if (WARN_ON(pe->tce32_seg >= 0)) 619 return; 620 621 /* Grab a 32-bit TCE table */ 622 pe->tce32_seg = base; 623 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 624 (base << 28), ((base + segs) << 28) - 1); 625 626 /* XXX Currently, we allocate one big contiguous table for the 627 * TCEs. We only really need one chunk per 256M of TCE space 628 * (ie per segment) but that's an optimization for later, it 629 * requires some added smarts with our get/put_tce implementation 630 */ 631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 632 get_order(TCE32_TABLE_SIZE * segs)); 633 if (!tce_mem) { 634 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 635 goto fail; 636 } 637 addr = page_address(tce_mem); 638 memset(addr, 0, TCE32_TABLE_SIZE * segs); 639 640 /* Configure HW */ 641 for (i = 0; i < segs; i++) { 642 rc = opal_pci_map_pe_dma_window(phb->opal_id, 643 pe->pe_number, 644 base + i, 1, 645 __pa(addr) + TCE32_TABLE_SIZE * i, 646 TCE32_TABLE_SIZE, 0x1000); 647 if (rc) { 648 pe_err(pe, " Failed to configure 32-bit TCE table," 649 " err %ld\n", rc); 650 goto fail; 651 } 652 } 653 654 /* Setup linux iommu table */ 655 tbl = &pe->tce32_table; 656 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 657 base << 28); 658 659 /* OPAL variant of P7IOC SW invalidated TCEs */ 660 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 661 if (swinvp) { 662 /* We need a couple more fields -- an address and a data 663 * to or. Since the bus is only printed out on table free 664 * errors, and on the first pass the data will be a relative 665 * bus number, print that out instead. 666 */ 667 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 668 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 669 8); 670 tbl->it_type |= (TCE_PCI_SWINV_CREATE | 671 TCE_PCI_SWINV_FREE | 672 TCE_PCI_SWINV_PAIR); 673 } 674 iommu_init_table(tbl, phb->hose->node); 675 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 676 677 if (pe->pdev) 678 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 679 else 680 pnv_ioda_setup_bus_dma(pe, pe->pbus); 681 682 return; 683 fail: 684 /* XXX Failure: Try to fallback to 64-bit only ? */ 685 if (pe->tce32_seg >= 0) 686 pe->tce32_seg = -1; 687 if (tce_mem) 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 689 } 690 691 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 692 { 693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 694 tce32_table); 695 uint16_t window_id = (pe->pe_number << 1 ) + 1; 696 int64_t rc; 697 698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 699 if (enable) { 700 phys_addr_t top = memblock_end_of_DRAM(); 701 702 top = roundup_pow_of_two(top); 703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 704 pe->pe_number, 705 window_id, 706 pe->tce_bypass_base, 707 top); 708 } else { 709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 710 pe->pe_number, 711 window_id, 712 pe->tce_bypass_base, 713 0); 714 715 /* 716 * We might want to reset the DMA ops of all devices on 717 * this PE. However in theory, that shouldn't be necessary 718 * as this is used for VFIO/KVM pass-through and the device 719 * hasn't yet been returned to its kernel driver 720 */ 721 } 722 if (rc) 723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 724 else 725 pe->tce_bypass_enabled = enable; 726 } 727 728 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 729 struct pnv_ioda_pe *pe) 730 { 731 /* TVE #1 is selected by PCI address bit 59 */ 732 pe->tce_bypass_base = 1ull << 59; 733 734 /* Install set_bypass callback for VFIO */ 735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 736 737 /* Enable bypass by default */ 738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 739 } 740 741 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 742 struct pnv_ioda_pe *pe) 743 { 744 struct page *tce_mem = NULL; 745 void *addr; 746 const __be64 *swinvp; 747 struct iommu_table *tbl; 748 unsigned int tce_table_size, end; 749 int64_t rc; 750 751 /* We shouldn't already have a 32-bit DMA associated */ 752 if (WARN_ON(pe->tce32_seg >= 0)) 753 return; 754 755 /* The PE will reserve all possible 32-bits space */ 756 pe->tce32_seg = 0; 757 end = (1 << ilog2(phb->ioda.m32_pci_base)); 758 tce_table_size = (end / 0x1000) * 8; 759 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 760 end); 761 762 /* Allocate TCE table */ 763 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 764 get_order(tce_table_size)); 765 if (!tce_mem) { 766 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 767 goto fail; 768 } 769 addr = page_address(tce_mem); 770 memset(addr, 0, tce_table_size); 771 772 /* 773 * Map TCE table through TVT. The TVE index is the PE number 774 * shifted by 1 bit for 32-bits DMA space. 775 */ 776 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 777 pe->pe_number << 1, 1, __pa(addr), 778 tce_table_size, 0x1000); 779 if (rc) { 780 pe_err(pe, "Failed to configure 32-bit TCE table," 781 " err %ld\n", rc); 782 goto fail; 783 } 784 785 /* Setup linux iommu table */ 786 tbl = &pe->tce32_table; 787 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 788 789 /* OPAL variant of PHB3 invalidated TCEs */ 790 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 791 if (swinvp) { 792 /* We need a couple more fields -- an address and a data 793 * to or. Since the bus is only printed out on table free 794 * errors, and on the first pass the data will be a relative 795 * bus number, print that out instead. 796 */ 797 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 798 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 799 8); 800 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); 801 } 802 iommu_init_table(tbl, phb->hose->node); 803 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 804 805 if (pe->pdev) 806 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 807 else 808 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 810 /* Also create a bypass window */ 811 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 812 return; 813 fail: 814 if (pe->tce32_seg >= 0) 815 pe->tce32_seg = -1; 816 if (tce_mem) 817 __free_pages(tce_mem, get_order(tce_table_size)); 818 } 819 820 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 821 { 822 struct pci_controller *hose = phb->hose; 823 unsigned int residual, remaining, segs, tw, base; 824 struct pnv_ioda_pe *pe; 825 826 /* If we have more PE# than segments available, hand out one 827 * per PE until we run out and let the rest fail. If not, 828 * then we assign at least one segment per PE, plus more based 829 * on the amount of devices under that PE 830 */ 831 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 832 residual = 0; 833 else 834 residual = phb->ioda.tce32_count - 835 phb->ioda.dma_pe_count; 836 837 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 838 hose->global_number, phb->ioda.tce32_count); 839 pr_info("PCI: %d PE# for a total weight of %d\n", 840 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 841 842 /* Walk our PE list and configure their DMA segments, hand them 843 * out one base segment plus any residual segments based on 844 * weight 845 */ 846 remaining = phb->ioda.tce32_count; 847 tw = phb->ioda.dma_weight; 848 base = 0; 849 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 850 if (!pe->dma_weight) 851 continue; 852 if (!remaining) { 853 pe_warn(pe, "No DMA32 resources available\n"); 854 continue; 855 } 856 segs = 1; 857 if (residual) { 858 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 859 if (segs > remaining) 860 segs = remaining; 861 } 862 863 /* 864 * For IODA2 compliant PHB3, we needn't care about the weight. 865 * The all available 32-bits DMA space will be assigned to 866 * the specific PE. 867 */ 868 if (phb->type == PNV_PHB_IODA1) { 869 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 870 pe->dma_weight, segs); 871 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 872 } else { 873 pe_info(pe, "Assign DMA32 space\n"); 874 segs = 0; 875 pnv_pci_ioda2_setup_dma_pe(phb, pe); 876 } 877 878 remaining -= segs; 879 base += segs; 880 } 881 } 882 883 #ifdef CONFIG_PCI_MSI 884 static void pnv_ioda2_msi_eoi(struct irq_data *d) 885 { 886 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 887 struct irq_chip *chip = irq_data_get_irq_chip(d); 888 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 889 ioda.irq_chip); 890 int64_t rc; 891 892 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 893 WARN_ON_ONCE(rc); 894 895 icp_native_eoi(d); 896 } 897 898 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 899 unsigned int hwirq, unsigned int virq, 900 unsigned int is_64, struct msi_msg *msg) 901 { 902 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 903 struct pci_dn *pdn = pci_get_pdn(dev); 904 struct irq_data *idata; 905 struct irq_chip *ichip; 906 unsigned int xive_num = hwirq - phb->msi_base; 907 __be32 data; 908 int rc; 909 910 /* No PE assigned ? bail out ... no MSI for you ! */ 911 if (pe == NULL) 912 return -ENXIO; 913 914 /* Check if we have an MVE */ 915 if (pe->mve_number < 0) 916 return -ENXIO; 917 918 /* Force 32-bit MSI on some broken devices */ 919 if (pdn && pdn->force_32bit_msi) 920 is_64 = 0; 921 922 /* Assign XIVE to PE */ 923 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 924 if (rc) { 925 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 926 pci_name(dev), rc, xive_num); 927 return -EIO; 928 } 929 930 if (is_64) { 931 __be64 addr64; 932 933 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 934 &addr64, &data); 935 if (rc) { 936 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 937 pci_name(dev), rc); 938 return -EIO; 939 } 940 msg->address_hi = be64_to_cpu(addr64) >> 32; 941 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 942 } else { 943 __be32 addr32; 944 945 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 946 &addr32, &data); 947 if (rc) { 948 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 949 pci_name(dev), rc); 950 return -EIO; 951 } 952 msg->address_hi = 0; 953 msg->address_lo = be32_to_cpu(addr32); 954 } 955 msg->data = be32_to_cpu(data); 956 957 /* 958 * Change the IRQ chip for the MSI interrupts on PHB3. 959 * The corresponding IRQ chip should be populated for 960 * the first time. 961 */ 962 if (phb->type == PNV_PHB_IODA2) { 963 if (!phb->ioda.irq_chip_init) { 964 idata = irq_get_irq_data(virq); 965 ichip = irq_data_get_irq_chip(idata); 966 phb->ioda.irq_chip_init = 1; 967 phb->ioda.irq_chip = *ichip; 968 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 969 } 970 971 irq_set_chip(virq, &phb->ioda.irq_chip); 972 } 973 974 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 975 " address=%x_%08x data=%x PE# %d\n", 976 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 977 msg->address_hi, msg->address_lo, data, pe->pe_number); 978 979 return 0; 980 } 981 982 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 983 { 984 unsigned int count; 985 const __be32 *prop = of_get_property(phb->hose->dn, 986 "ibm,opal-msi-ranges", NULL); 987 if (!prop) { 988 /* BML Fallback */ 989 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 990 } 991 if (!prop) 992 return; 993 994 phb->msi_base = be32_to_cpup(prop); 995 count = be32_to_cpup(prop + 1); 996 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 997 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 998 phb->hose->global_number); 999 return; 1000 } 1001 1002 phb->msi_setup = pnv_pci_ioda_msi_setup; 1003 phb->msi32_support = 1; 1004 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1005 count, phb->msi_base); 1006 } 1007 #else 1008 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1009 #endif /* CONFIG_PCI_MSI */ 1010 1011 /* 1012 * This function is supposed to be called on basis of PE from top 1013 * to bottom style. So the the I/O or MMIO segment assigned to 1014 * parent PE could be overrided by its child PEs if necessary. 1015 */ 1016 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1017 struct pnv_ioda_pe *pe) 1018 { 1019 struct pnv_phb *phb = hose->private_data; 1020 struct pci_bus_region region; 1021 struct resource *res; 1022 int i, index; 1023 int rc; 1024 1025 /* 1026 * NOTE: We only care PCI bus based PE for now. For PCI 1027 * device based PE, for example SRIOV sensitive VF should 1028 * be figured out later. 1029 */ 1030 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1031 1032 pci_bus_for_each_resource(pe->pbus, res, i) { 1033 if (!res || !res->flags || 1034 res->start > res->end) 1035 continue; 1036 1037 if (res->flags & IORESOURCE_IO) { 1038 region.start = res->start - phb->ioda.io_pci_base; 1039 region.end = res->end - phb->ioda.io_pci_base; 1040 index = region.start / phb->ioda.io_segsize; 1041 1042 while (index < phb->ioda.total_pe && 1043 region.start <= region.end) { 1044 phb->ioda.io_segmap[index] = pe->pe_number; 1045 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1046 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1047 if (rc != OPAL_SUCCESS) { 1048 pr_err("%s: OPAL error %d when mapping IO " 1049 "segment #%d to PE#%d\n", 1050 __func__, rc, index, pe->pe_number); 1051 break; 1052 } 1053 1054 region.start += phb->ioda.io_segsize; 1055 index++; 1056 } 1057 } else if (res->flags & IORESOURCE_MEM) { 1058 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 1059 * harden that algorithm when we start supporting M64 1060 */ 1061 region.start = res->start - 1062 hose->mem_offset[0] - 1063 phb->ioda.m32_pci_base; 1064 region.end = res->end - 1065 hose->mem_offset[0] - 1066 phb->ioda.m32_pci_base; 1067 index = region.start / phb->ioda.m32_segsize; 1068 1069 while (index < phb->ioda.total_pe && 1070 region.start <= region.end) { 1071 phb->ioda.m32_segmap[index] = pe->pe_number; 1072 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1073 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1074 if (rc != OPAL_SUCCESS) { 1075 pr_err("%s: OPAL error %d when mapping M32 " 1076 "segment#%d to PE#%d", 1077 __func__, rc, index, pe->pe_number); 1078 break; 1079 } 1080 1081 region.start += phb->ioda.m32_segsize; 1082 index++; 1083 } 1084 } 1085 } 1086 } 1087 1088 static void pnv_pci_ioda_setup_seg(void) 1089 { 1090 struct pci_controller *tmp, *hose; 1091 struct pnv_phb *phb; 1092 struct pnv_ioda_pe *pe; 1093 1094 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1095 phb = hose->private_data; 1096 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1097 pnv_ioda_setup_pe_seg(hose, pe); 1098 } 1099 } 1100 } 1101 1102 static void pnv_pci_ioda_setup_DMA(void) 1103 { 1104 struct pci_controller *hose, *tmp; 1105 struct pnv_phb *phb; 1106 1107 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1108 pnv_ioda_setup_dma(hose->private_data); 1109 1110 /* Mark the PHB initialization done */ 1111 phb = hose->private_data; 1112 phb->initialized = 1; 1113 } 1114 } 1115 1116 static void pnv_pci_ioda_create_dbgfs(void) 1117 { 1118 #ifdef CONFIG_DEBUG_FS 1119 struct pci_controller *hose, *tmp; 1120 struct pnv_phb *phb; 1121 char name[16]; 1122 1123 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1124 phb = hose->private_data; 1125 1126 sprintf(name, "PCI%04x", hose->global_number); 1127 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1128 if (!phb->dbgfs) 1129 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1130 __func__, hose->global_number); 1131 } 1132 #endif /* CONFIG_DEBUG_FS */ 1133 } 1134 1135 static void pnv_pci_ioda_fixup(void) 1136 { 1137 pnv_pci_ioda_setup_PEs(); 1138 pnv_pci_ioda_setup_seg(); 1139 pnv_pci_ioda_setup_DMA(); 1140 1141 pnv_pci_ioda_create_dbgfs(); 1142 1143 #ifdef CONFIG_EEH 1144 eeh_probe_mode_set(EEH_PROBE_MODE_DEV); 1145 eeh_addr_cache_build(); 1146 eeh_init(); 1147 #endif 1148 } 1149 1150 /* 1151 * Returns the alignment for I/O or memory windows for P2P 1152 * bridges. That actually depends on how PEs are segmented. 1153 * For now, we return I/O or M32 segment size for PE sensitive 1154 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1155 * 1MiB for memory) will be returned. 1156 * 1157 * The current PCI bus might be put into one PE, which was 1158 * create against the parent PCI bridge. For that case, we 1159 * needn't enlarge the alignment so that we can save some 1160 * resources. 1161 */ 1162 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1163 unsigned long type) 1164 { 1165 struct pci_dev *bridge; 1166 struct pci_controller *hose = pci_bus_to_host(bus); 1167 struct pnv_phb *phb = hose->private_data; 1168 int num_pci_bridges = 0; 1169 1170 bridge = bus->self; 1171 while (bridge) { 1172 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1173 num_pci_bridges++; 1174 if (num_pci_bridges >= 2) 1175 return 1; 1176 } 1177 1178 bridge = bridge->bus->self; 1179 } 1180 1181 /* We need support prefetchable memory window later */ 1182 if (type & IORESOURCE_MEM) 1183 return phb->ioda.m32_segsize; 1184 1185 return phb->ioda.io_segsize; 1186 } 1187 1188 /* Prevent enabling devices for which we couldn't properly 1189 * assign a PE 1190 */ 1191 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1192 { 1193 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1194 struct pnv_phb *phb = hose->private_data; 1195 struct pci_dn *pdn; 1196 1197 /* The function is probably called while the PEs have 1198 * not be created yet. For example, resource reassignment 1199 * during PCI probe period. We just skip the check if 1200 * PEs isn't ready. 1201 */ 1202 if (!phb->initialized) 1203 return 0; 1204 1205 pdn = pci_get_pdn(dev); 1206 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1207 return -EINVAL; 1208 1209 return 0; 1210 } 1211 1212 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1213 u32 devfn) 1214 { 1215 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1216 } 1217 1218 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1219 { 1220 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1221 OPAL_ASSERT_RESET); 1222 } 1223 1224 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1225 u64 hub_id, int ioda_type) 1226 { 1227 struct pci_controller *hose; 1228 struct pnv_phb *phb; 1229 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1230 const __be64 *prop64; 1231 const __be32 *prop32; 1232 int len; 1233 u64 phb_id; 1234 void *aux; 1235 long rc; 1236 1237 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1238 1239 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1240 if (!prop64) { 1241 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1242 return; 1243 } 1244 phb_id = be64_to_cpup(prop64); 1245 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1246 1247 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1248 if (!phb) { 1249 pr_err(" Out of memory !\n"); 1250 return; 1251 } 1252 1253 /* Allocate PCI controller */ 1254 memset(phb, 0, sizeof(struct pnv_phb)); 1255 phb->hose = hose = pcibios_alloc_controller(np); 1256 if (!phb->hose) { 1257 pr_err(" Can't allocate PCI controller for %s\n", 1258 np->full_name); 1259 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1260 return; 1261 } 1262 1263 spin_lock_init(&phb->lock); 1264 prop32 = of_get_property(np, "bus-range", &len); 1265 if (prop32 && len == 8) { 1266 hose->first_busno = be32_to_cpu(prop32[0]); 1267 hose->last_busno = be32_to_cpu(prop32[1]); 1268 } else { 1269 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1270 hose->first_busno = 0; 1271 hose->last_busno = 0xff; 1272 } 1273 hose->private_data = phb; 1274 phb->hub_id = hub_id; 1275 phb->opal_id = phb_id; 1276 phb->type = ioda_type; 1277 1278 /* Detect specific models for error handling */ 1279 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1280 phb->model = PNV_PHB_MODEL_P7IOC; 1281 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1282 phb->model = PNV_PHB_MODEL_PHB3; 1283 else 1284 phb->model = PNV_PHB_MODEL_UNKNOWN; 1285 1286 /* Parse 32-bit and IO ranges (if any) */ 1287 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1288 1289 /* Get registers */ 1290 phb->regs = of_iomap(np, 0); 1291 if (phb->regs == NULL) 1292 pr_err(" Failed to map registers !\n"); 1293 1294 /* Initialize more IODA stuff */ 1295 phb->ioda.total_pe = 1; 1296 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1297 if (prop32) 1298 phb->ioda.total_pe = be32_to_cpup(prop32); 1299 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1300 if (prop32) 1301 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1302 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1303 /* FW Has already off top 64k of M32 space (MSI space) */ 1304 phb->ioda.m32_size += 0x10000; 1305 1306 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1307 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1308 phb->ioda.io_size = hose->pci_io_size; 1309 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1310 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1311 1312 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1313 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1314 m32map_off = size; 1315 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1316 if (phb->type == PNV_PHB_IODA1) { 1317 iomap_off = size; 1318 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1319 } 1320 pemap_off = size; 1321 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1322 aux = alloc_bootmem(size); 1323 memset(aux, 0, size); 1324 phb->ioda.pe_alloc = aux; 1325 phb->ioda.m32_segmap = aux + m32map_off; 1326 if (phb->type == PNV_PHB_IODA1) 1327 phb->ioda.io_segmap = aux + iomap_off; 1328 phb->ioda.pe_array = aux + pemap_off; 1329 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1330 1331 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1332 INIT_LIST_HEAD(&phb->ioda.pe_list); 1333 1334 /* Calculate how many 32-bit TCE segments we have */ 1335 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1336 1337 /* Clear unusable m64 */ 1338 hose->mem_resources[1].flags = 0; 1339 hose->mem_resources[1].start = 0; 1340 hose->mem_resources[1].end = 0; 1341 hose->mem_resources[2].flags = 0; 1342 hose->mem_resources[2].start = 0; 1343 hose->mem_resources[2].end = 0; 1344 1345 #if 0 /* We should really do that ... */ 1346 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1347 window_type, 1348 window_num, 1349 starting_real_address, 1350 starting_pci_address, 1351 segment_size); 1352 #endif 1353 1354 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" 1355 " IO: 0x%x [segment=0x%x]\n", 1356 phb->ioda.total_pe, 1357 phb->ioda.reserved_pe, 1358 phb->ioda.m32_size, phb->ioda.m32_segsize, 1359 phb->ioda.io_size, phb->ioda.io_segsize); 1360 1361 phb->hose->ops = &pnv_pci_ops; 1362 #ifdef CONFIG_EEH 1363 phb->eeh_ops = &ioda_eeh_ops; 1364 #endif 1365 1366 /* Setup RID -> PE mapping function */ 1367 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1368 1369 /* Setup TCEs */ 1370 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1372 1373 /* Setup shutdown function for kexec */ 1374 phb->shutdown = pnv_pci_ioda_shutdown; 1375 1376 /* Setup MSI support */ 1377 pnv_pci_init_ioda_msis(phb); 1378 1379 /* 1380 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1381 * to let the PCI core do resource assignment. It's supposed 1382 * that the PCI core will do correct I/O and MMIO alignment 1383 * for the P2P bridge bars so that each PCI bus (excluding 1384 * the child P2P bridges) can form individual PE. 1385 */ 1386 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1387 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1388 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1389 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; 1390 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1391 1392 /* Reset IODA tables to a clean state */ 1393 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1394 if (rc) 1395 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1396 1397 /* If we're running in kdump kerenl, the previous kerenl never 1398 * shutdown PCI devices correctly. We already got IODA table 1399 * cleaned out. So we have to issue PHB reset to stop all PCI 1400 * transactions from previous kerenl. 1401 */ 1402 if (is_kdump_kernel()) { 1403 pr_info(" Issue PHB reset ...\n"); 1404 ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); 1405 ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); 1406 } 1407 } 1408 1409 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 1410 { 1411 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1412 } 1413 1414 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1415 { 1416 struct device_node *phbn; 1417 const __be64 *prop64; 1418 u64 hub_id; 1419 1420 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1421 1422 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1423 if (!prop64) { 1424 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1425 return; 1426 } 1427 hub_id = be64_to_cpup(prop64); 1428 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1429 1430 /* Count child PHBs */ 1431 for_each_child_of_node(np, phbn) { 1432 /* Look for IODA1 PHBs */ 1433 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1434 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1435 } 1436 } 1437