1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/delay.h> 17 #include <linux/string.h> 18 #include <linux/init.h> 19 #include <linux/bootmem.h> 20 #include <linux/irq.h> 21 #include <linux/io.h> 22 #include <linux/msi.h> 23 24 #include <asm/sections.h> 25 #include <asm/io.h> 26 #include <asm/prom.h> 27 #include <asm/pci-bridge.h> 28 #include <asm/machdep.h> 29 #include <asm/msi_bitmap.h> 30 #include <asm/ppc-pci.h> 31 #include <asm/opal.h> 32 #include <asm/iommu.h> 33 #include <asm/tce.h> 34 #include <asm/xics.h> 35 36 #include "powernv.h" 37 #include "pci.h" 38 39 #define define_pe_printk_level(func, kern_level) \ 40 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 41 { \ 42 struct va_format vaf; \ 43 va_list args; \ 44 char pfix[32]; \ 45 int r; \ 46 \ 47 va_start(args, fmt); \ 48 \ 49 vaf.fmt = fmt; \ 50 vaf.va = &args; \ 51 \ 52 if (pe->pdev) \ 53 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 54 sizeof(pfix)); \ 55 else \ 56 sprintf(pfix, "%04x:%02x ", \ 57 pci_domain_nr(pe->pbus), \ 58 pe->pbus->number); \ 59 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 60 pfix, pe->pe_number, &vaf); \ 61 \ 62 va_end(args); \ 63 \ 64 return r; \ 65 } \ 66 67 define_pe_printk_level(pe_err, KERN_ERR); 68 define_pe_printk_level(pe_warn, KERN_WARNING); 69 define_pe_printk_level(pe_info, KERN_INFO); 70 71 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 72 { 73 unsigned long pe; 74 75 do { 76 pe = find_next_zero_bit(phb->ioda.pe_alloc, 77 phb->ioda.total_pe, 0); 78 if (pe >= phb->ioda.total_pe) 79 return IODA_INVALID_PE; 80 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 81 82 phb->ioda.pe_array[pe].phb = phb; 83 phb->ioda.pe_array[pe].pe_number = pe; 84 return pe; 85 } 86 87 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 88 { 89 WARN_ON(phb->ioda.pe_array[pe].pdev); 90 91 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 92 clear_bit(pe, phb->ioda.pe_alloc); 93 } 94 95 /* Currently those 2 are only used when MSIs are enabled, this will change 96 * but in the meantime, we need to protect them to avoid warnings 97 */ 98 #ifdef CONFIG_PCI_MSI 99 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 100 { 101 struct pci_controller *hose = pci_bus_to_host(dev->bus); 102 struct pnv_phb *phb = hose->private_data; 103 struct pci_dn *pdn = pci_get_pdn(dev); 104 105 if (!pdn) 106 return NULL; 107 if (pdn->pe_number == IODA_INVALID_PE) 108 return NULL; 109 return &phb->ioda.pe_array[pdn->pe_number]; 110 } 111 #endif /* CONFIG_PCI_MSI */ 112 113 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 114 { 115 struct pci_dev *parent; 116 uint8_t bcomp, dcomp, fcomp; 117 long rc, rid_end, rid; 118 119 /* Bus validation ? */ 120 if (pe->pbus) { 121 int count; 122 123 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 124 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 125 parent = pe->pbus->self; 126 if (pe->flags & PNV_IODA_PE_BUS_ALL) 127 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 128 else 129 count = 1; 130 131 switch(count) { 132 case 1: bcomp = OpalPciBusAll; break; 133 case 2: bcomp = OpalPciBus7Bits; break; 134 case 4: bcomp = OpalPciBus6Bits; break; 135 case 8: bcomp = OpalPciBus5Bits; break; 136 case 16: bcomp = OpalPciBus4Bits; break; 137 case 32: bcomp = OpalPciBus3Bits; break; 138 default: 139 pr_err("%s: Number of subordinate busses %d" 140 " unsupported\n", 141 pci_name(pe->pbus->self), count); 142 /* Do an exact match only */ 143 bcomp = OpalPciBusAll; 144 } 145 rid_end = pe->rid + (count << 8); 146 } else { 147 parent = pe->pdev->bus->self; 148 bcomp = OpalPciBusAll; 149 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 150 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 151 rid_end = pe->rid + 1; 152 } 153 154 /* Associate PE in PELT */ 155 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 156 bcomp, dcomp, fcomp, OPAL_MAP_PE); 157 if (rc) { 158 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 159 return -ENXIO; 160 } 161 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 162 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 163 164 /* Add to all parents PELT-V */ 165 while (parent) { 166 struct pci_dn *pdn = pci_get_pdn(parent); 167 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 168 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 169 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 170 /* XXX What to do in case of error ? */ 171 } 172 parent = parent->bus->self; 173 } 174 /* Setup reverse map */ 175 for (rid = pe->rid; rid < rid_end; rid++) 176 phb->ioda.pe_rmap[rid] = pe->pe_number; 177 178 /* Setup one MVTs on IODA1 */ 179 if (phb->type == PNV_PHB_IODA1) { 180 pe->mve_number = pe->pe_number; 181 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 182 pe->pe_number); 183 if (rc) { 184 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 185 rc, pe->mve_number); 186 pe->mve_number = -1; 187 } else { 188 rc = opal_pci_set_mve_enable(phb->opal_id, 189 pe->mve_number, OPAL_ENABLE_MVE); 190 if (rc) { 191 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 192 rc, pe->mve_number); 193 pe->mve_number = -1; 194 } 195 } 196 } else if (phb->type == PNV_PHB_IODA2) 197 pe->mve_number = 0; 198 199 return 0; 200 } 201 202 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 203 struct pnv_ioda_pe *pe) 204 { 205 struct pnv_ioda_pe *lpe; 206 207 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 208 if (lpe->dma_weight < pe->dma_weight) { 209 list_add_tail(&pe->dma_link, &lpe->dma_link); 210 return; 211 } 212 } 213 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 214 } 215 216 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 217 { 218 /* This is quite simplistic. The "base" weight of a device 219 * is 10. 0 means no DMA is to be accounted for it. 220 */ 221 222 /* If it's a bridge, no DMA */ 223 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 224 return 0; 225 226 /* Reduce the weight of slow USB controllers */ 227 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 228 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 229 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 230 return 3; 231 232 /* Increase the weight of RAID (includes Obsidian) */ 233 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 234 return 15; 235 236 /* Default */ 237 return 10; 238 } 239 240 #if 0 241 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 242 { 243 struct pci_controller *hose = pci_bus_to_host(dev->bus); 244 struct pnv_phb *phb = hose->private_data; 245 struct pci_dn *pdn = pci_get_pdn(dev); 246 struct pnv_ioda_pe *pe; 247 int pe_num; 248 249 if (!pdn) { 250 pr_err("%s: Device tree node not associated properly\n", 251 pci_name(dev)); 252 return NULL; 253 } 254 if (pdn->pe_number != IODA_INVALID_PE) 255 return NULL; 256 257 /* PE#0 has been pre-set */ 258 if (dev->bus->number == 0) 259 pe_num = 0; 260 else 261 pe_num = pnv_ioda_alloc_pe(phb); 262 if (pe_num == IODA_INVALID_PE) { 263 pr_warning("%s: Not enough PE# available, disabling device\n", 264 pci_name(dev)); 265 return NULL; 266 } 267 268 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 269 * pointer in the PE data structure, both should be destroyed at the 270 * same time. However, this needs to be looked at more closely again 271 * once we actually start removing things (Hotplug, SR-IOV, ...) 272 * 273 * At some point we want to remove the PDN completely anyways 274 */ 275 pe = &phb->ioda.pe_array[pe_num]; 276 pci_dev_get(dev); 277 pdn->pcidev = dev; 278 pdn->pe_number = pe_num; 279 pe->pdev = dev; 280 pe->pbus = NULL; 281 pe->tce32_seg = -1; 282 pe->mve_number = -1; 283 pe->rid = dev->bus->number << 8 | pdn->devfn; 284 285 pe_info(pe, "Associated device to PE\n"); 286 287 if (pnv_ioda_configure_pe(phb, pe)) { 288 /* XXX What do we do here ? */ 289 if (pe_num) 290 pnv_ioda_free_pe(phb, pe_num); 291 pdn->pe_number = IODA_INVALID_PE; 292 pe->pdev = NULL; 293 pci_dev_put(dev); 294 return NULL; 295 } 296 297 /* Assign a DMA weight to the device */ 298 pe->dma_weight = pnv_ioda_dma_weight(dev); 299 if (pe->dma_weight != 0) { 300 phb->ioda.dma_weight += pe->dma_weight; 301 phb->ioda.dma_pe_count++; 302 } 303 304 /* Link the PE */ 305 pnv_ioda_link_pe_by_weight(phb, pe); 306 307 return pe; 308 } 309 #endif /* Useful for SRIOV case */ 310 311 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 312 { 313 struct pci_dev *dev; 314 315 list_for_each_entry(dev, &bus->devices, bus_list) { 316 struct pci_dn *pdn = pci_get_pdn(dev); 317 318 if (pdn == NULL) { 319 pr_warn("%s: No device node associated with device !\n", 320 pci_name(dev)); 321 continue; 322 } 323 pci_dev_get(dev); 324 pdn->pcidev = dev; 325 pdn->pe_number = pe->pe_number; 326 pe->dma_weight += pnv_ioda_dma_weight(dev); 327 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 328 pnv_ioda_setup_same_PE(dev->subordinate, pe); 329 } 330 } 331 332 /* 333 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 334 * single PCI bus. Another one that contains the primary PCI bus and its 335 * subordinate PCI devices and buses. The second type of PE is normally 336 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 337 */ 338 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 339 { 340 struct pci_controller *hose = pci_bus_to_host(bus); 341 struct pnv_phb *phb = hose->private_data; 342 struct pnv_ioda_pe *pe; 343 int pe_num; 344 345 pe_num = pnv_ioda_alloc_pe(phb); 346 if (pe_num == IODA_INVALID_PE) { 347 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 348 __func__, pci_domain_nr(bus), bus->number); 349 return; 350 } 351 352 pe = &phb->ioda.pe_array[pe_num]; 353 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 354 pe->pbus = bus; 355 pe->pdev = NULL; 356 pe->tce32_seg = -1; 357 pe->mve_number = -1; 358 pe->rid = bus->busn_res.start << 8; 359 pe->dma_weight = 0; 360 361 if (all) 362 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 363 bus->busn_res.start, bus->busn_res.end, pe_num); 364 else 365 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 366 bus->busn_res.start, pe_num); 367 368 if (pnv_ioda_configure_pe(phb, pe)) { 369 /* XXX What do we do here ? */ 370 if (pe_num) 371 pnv_ioda_free_pe(phb, pe_num); 372 pe->pbus = NULL; 373 return; 374 } 375 376 /* Associate it with all child devices */ 377 pnv_ioda_setup_same_PE(bus, pe); 378 379 /* Put PE to the list */ 380 list_add_tail(&pe->list, &phb->ioda.pe_list); 381 382 /* Account for one DMA PE if at least one DMA capable device exist 383 * below the bridge 384 */ 385 if (pe->dma_weight != 0) { 386 phb->ioda.dma_weight += pe->dma_weight; 387 phb->ioda.dma_pe_count++; 388 } 389 390 /* Link the PE */ 391 pnv_ioda_link_pe_by_weight(phb, pe); 392 } 393 394 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 395 { 396 struct pci_dev *dev; 397 398 pnv_ioda_setup_bus_PE(bus, 0); 399 400 list_for_each_entry(dev, &bus->devices, bus_list) { 401 if (dev->subordinate) { 402 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 403 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 404 else 405 pnv_ioda_setup_PEs(dev->subordinate); 406 } 407 } 408 } 409 410 /* 411 * Configure PEs so that the downstream PCI buses and devices 412 * could have their associated PE#. Unfortunately, we didn't 413 * figure out the way to identify the PLX bridge yet. So we 414 * simply put the PCI bus and the subordinate behind the root 415 * port to PE# here. The game rule here is expected to be changed 416 * as soon as we can detected PLX bridge correctly. 417 */ 418 static void pnv_pci_ioda_setup_PEs(void) 419 { 420 struct pci_controller *hose, *tmp; 421 422 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 423 pnv_ioda_setup_PEs(hose->bus); 424 } 425 } 426 427 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 428 { 429 struct pci_dn *pdn = pci_get_pdn(pdev); 430 struct pnv_ioda_pe *pe; 431 432 /* 433 * The function can be called while the PE# 434 * hasn't been assigned. Do nothing for the 435 * case. 436 */ 437 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 438 return; 439 440 pe = &phb->ioda.pe_array[pdn->pe_number]; 441 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 442 } 443 444 static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, 445 u64 *startp, u64 *endp) 446 { 447 u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; 448 unsigned long start, end, inc; 449 450 start = __pa(startp); 451 end = __pa(endp); 452 453 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 454 if (tbl->it_busno) { 455 start <<= 12; 456 end <<= 12; 457 inc = 128 << 12; 458 start |= tbl->it_busno; 459 end |= tbl->it_busno; 460 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 461 /* p7ioc-style invalidation, 2 TCEs per write */ 462 start |= (1ull << 63); 463 end |= (1ull << 63); 464 inc = 16; 465 } else { 466 /* Default (older HW) */ 467 inc = 128; 468 } 469 470 end |= inc - 1; /* round up end to be different than start */ 471 472 mb(); /* Ensure above stores are visible */ 473 while (start <= end) { 474 __raw_writeq(start, invalidate); 475 start += inc; 476 } 477 478 /* 479 * The iommu layer will do another mb() for us on build() 480 * and we don't care on free() 481 */ 482 } 483 484 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 485 struct iommu_table *tbl, 486 u64 *startp, u64 *endp) 487 { 488 unsigned long start, end, inc; 489 u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; 490 491 /* We'll invalidate DMA address in PE scope */ 492 start = 0x2ul << 60; 493 start |= (pe->pe_number & 0xFF); 494 end = start; 495 496 /* Figure out the start, end and step */ 497 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 498 start |= (inc << 12); 499 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 500 end |= (inc << 12); 501 inc = (0x1ul << 12); 502 mb(); 503 504 while (start <= end) { 505 __raw_writeq(start, invalidate); 506 start += inc; 507 } 508 } 509 510 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 511 u64 *startp, u64 *endp) 512 { 513 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 514 tce32_table); 515 struct pnv_phb *phb = pe->phb; 516 517 if (phb->type == PNV_PHB_IODA1) 518 pnv_pci_ioda1_tce_invalidate(tbl, startp, endp); 519 else 520 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp); 521 } 522 523 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 524 struct pnv_ioda_pe *pe, unsigned int base, 525 unsigned int segs) 526 { 527 528 struct page *tce_mem = NULL; 529 const __be64 *swinvp; 530 struct iommu_table *tbl; 531 unsigned int i; 532 int64_t rc; 533 void *addr; 534 535 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 536 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 537 538 /* XXX FIXME: Handle 64-bit only DMA devices */ 539 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 540 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 541 542 /* We shouldn't already have a 32-bit DMA associated */ 543 if (WARN_ON(pe->tce32_seg >= 0)) 544 return; 545 546 /* Grab a 32-bit TCE table */ 547 pe->tce32_seg = base; 548 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 549 (base << 28), ((base + segs) << 28) - 1); 550 551 /* XXX Currently, we allocate one big contiguous table for the 552 * TCEs. We only really need one chunk per 256M of TCE space 553 * (ie per segment) but that's an optimization for later, it 554 * requires some added smarts with our get/put_tce implementation 555 */ 556 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 557 get_order(TCE32_TABLE_SIZE * segs)); 558 if (!tce_mem) { 559 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 560 goto fail; 561 } 562 addr = page_address(tce_mem); 563 memset(addr, 0, TCE32_TABLE_SIZE * segs); 564 565 /* Configure HW */ 566 for (i = 0; i < segs; i++) { 567 rc = opal_pci_map_pe_dma_window(phb->opal_id, 568 pe->pe_number, 569 base + i, 1, 570 __pa(addr) + TCE32_TABLE_SIZE * i, 571 TCE32_TABLE_SIZE, 0x1000); 572 if (rc) { 573 pe_err(pe, " Failed to configure 32-bit TCE table," 574 " err %ld\n", rc); 575 goto fail; 576 } 577 } 578 579 /* Setup linux iommu table */ 580 tbl = &pe->tce32_table; 581 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 582 base << 28); 583 584 /* OPAL variant of P7IOC SW invalidated TCEs */ 585 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 586 if (swinvp) { 587 /* We need a couple more fields -- an address and a data 588 * to or. Since the bus is only printed out on table free 589 * errors, and on the first pass the data will be a relative 590 * bus number, print that out instead. 591 */ 592 tbl->it_busno = 0; 593 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); 594 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | 595 TCE_PCI_SWINV_PAIR; 596 } 597 iommu_init_table(tbl, phb->hose->node); 598 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 599 600 return; 601 fail: 602 /* XXX Failure: Try to fallback to 64-bit only ? */ 603 if (pe->tce32_seg >= 0) 604 pe->tce32_seg = -1; 605 if (tce_mem) 606 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 607 } 608 609 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 610 struct pnv_ioda_pe *pe) 611 { 612 struct page *tce_mem = NULL; 613 void *addr; 614 const __be64 *swinvp; 615 struct iommu_table *tbl; 616 unsigned int tce_table_size, end; 617 int64_t rc; 618 619 /* We shouldn't already have a 32-bit DMA associated */ 620 if (WARN_ON(pe->tce32_seg >= 0)) 621 return; 622 623 /* The PE will reserve all possible 32-bits space */ 624 pe->tce32_seg = 0; 625 end = (1 << ilog2(phb->ioda.m32_pci_base)); 626 tce_table_size = (end / 0x1000) * 8; 627 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 628 end); 629 630 /* Allocate TCE table */ 631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 632 get_order(tce_table_size)); 633 if (!tce_mem) { 634 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 635 goto fail; 636 } 637 addr = page_address(tce_mem); 638 memset(addr, 0, tce_table_size); 639 640 /* 641 * Map TCE table through TVT. The TVE index is the PE number 642 * shifted by 1 bit for 32-bits DMA space. 643 */ 644 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 645 pe->pe_number << 1, 1, __pa(addr), 646 tce_table_size, 0x1000); 647 if (rc) { 648 pe_err(pe, "Failed to configure 32-bit TCE table," 649 " err %ld\n", rc); 650 goto fail; 651 } 652 653 /* Setup linux iommu table */ 654 tbl = &pe->tce32_table; 655 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 656 657 /* OPAL variant of PHB3 invalidated TCEs */ 658 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 659 if (swinvp) { 660 /* We need a couple more fields -- an address and a data 661 * to or. Since the bus is only printed out on table free 662 * errors, and on the first pass the data will be a relative 663 * bus number, print that out instead. 664 */ 665 tbl->it_busno = 0; 666 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); 667 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; 668 } 669 iommu_init_table(tbl, phb->hose->node); 670 671 return; 672 fail: 673 if (pe->tce32_seg >= 0) 674 pe->tce32_seg = -1; 675 if (tce_mem) 676 __free_pages(tce_mem, get_order(tce_table_size)); 677 } 678 679 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 680 { 681 struct pci_controller *hose = phb->hose; 682 unsigned int residual, remaining, segs, tw, base; 683 struct pnv_ioda_pe *pe; 684 685 /* If we have more PE# than segments available, hand out one 686 * per PE until we run out and let the rest fail. If not, 687 * then we assign at least one segment per PE, plus more based 688 * on the amount of devices under that PE 689 */ 690 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 691 residual = 0; 692 else 693 residual = phb->ioda.tce32_count - 694 phb->ioda.dma_pe_count; 695 696 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 697 hose->global_number, phb->ioda.tce32_count); 698 pr_info("PCI: %d PE# for a total weight of %d\n", 699 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 700 701 /* Walk our PE list and configure their DMA segments, hand them 702 * out one base segment plus any residual segments based on 703 * weight 704 */ 705 remaining = phb->ioda.tce32_count; 706 tw = phb->ioda.dma_weight; 707 base = 0; 708 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 709 if (!pe->dma_weight) 710 continue; 711 if (!remaining) { 712 pe_warn(pe, "No DMA32 resources available\n"); 713 continue; 714 } 715 segs = 1; 716 if (residual) { 717 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 718 if (segs > remaining) 719 segs = remaining; 720 } 721 722 /* 723 * For IODA2 compliant PHB3, we needn't care about the weight. 724 * The all available 32-bits DMA space will be assigned to 725 * the specific PE. 726 */ 727 if (phb->type == PNV_PHB_IODA1) { 728 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 729 pe->dma_weight, segs); 730 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 731 } else { 732 pe_info(pe, "Assign DMA32 space\n"); 733 segs = 0; 734 pnv_pci_ioda2_setup_dma_pe(phb, pe); 735 } 736 737 remaining -= segs; 738 base += segs; 739 } 740 } 741 742 #ifdef CONFIG_PCI_MSI 743 static void pnv_ioda2_msi_eoi(struct irq_data *d) 744 { 745 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 746 struct irq_chip *chip = irq_data_get_irq_chip(d); 747 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 748 ioda.irq_chip); 749 int64_t rc; 750 751 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 752 WARN_ON_ONCE(rc); 753 754 icp_native_eoi(d); 755 } 756 757 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 758 unsigned int hwirq, unsigned int virq, 759 unsigned int is_64, struct msi_msg *msg) 760 { 761 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 762 struct pci_dn *pdn = pci_get_pdn(dev); 763 struct irq_data *idata; 764 struct irq_chip *ichip; 765 unsigned int xive_num = hwirq - phb->msi_base; 766 uint64_t addr64; 767 uint32_t addr32, data; 768 int rc; 769 770 /* No PE assigned ? bail out ... no MSI for you ! */ 771 if (pe == NULL) 772 return -ENXIO; 773 774 /* Check if we have an MVE */ 775 if (pe->mve_number < 0) 776 return -ENXIO; 777 778 /* Force 32-bit MSI on some broken devices */ 779 if (pdn && pdn->force_32bit_msi) 780 is_64 = 0; 781 782 /* Assign XIVE to PE */ 783 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 784 if (rc) { 785 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 786 pci_name(dev), rc, xive_num); 787 return -EIO; 788 } 789 790 if (is_64) { 791 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 792 &addr64, &data); 793 if (rc) { 794 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 795 pci_name(dev), rc); 796 return -EIO; 797 } 798 msg->address_hi = addr64 >> 32; 799 msg->address_lo = addr64 & 0xfffffffful; 800 } else { 801 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 802 &addr32, &data); 803 if (rc) { 804 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 805 pci_name(dev), rc); 806 return -EIO; 807 } 808 msg->address_hi = 0; 809 msg->address_lo = addr32; 810 } 811 msg->data = data; 812 813 /* 814 * Change the IRQ chip for the MSI interrupts on PHB3. 815 * The corresponding IRQ chip should be populated for 816 * the first time. 817 */ 818 if (phb->type == PNV_PHB_IODA2) { 819 if (!phb->ioda.irq_chip_init) { 820 idata = irq_get_irq_data(virq); 821 ichip = irq_data_get_irq_chip(idata); 822 phb->ioda.irq_chip_init = 1; 823 phb->ioda.irq_chip = *ichip; 824 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 825 } 826 827 irq_set_chip(virq, &phb->ioda.irq_chip); 828 } 829 830 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 831 " address=%x_%08x data=%x PE# %d\n", 832 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 833 msg->address_hi, msg->address_lo, data, pe->pe_number); 834 835 return 0; 836 } 837 838 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 839 { 840 unsigned int count; 841 const __be32 *prop = of_get_property(phb->hose->dn, 842 "ibm,opal-msi-ranges", NULL); 843 if (!prop) { 844 /* BML Fallback */ 845 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 846 } 847 if (!prop) 848 return; 849 850 phb->msi_base = be32_to_cpup(prop); 851 count = be32_to_cpup(prop + 1); 852 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 853 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 854 phb->hose->global_number); 855 return; 856 } 857 858 phb->msi_setup = pnv_pci_ioda_msi_setup; 859 phb->msi32_support = 1; 860 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 861 count, phb->msi_base); 862 } 863 #else 864 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 865 #endif /* CONFIG_PCI_MSI */ 866 867 /* 868 * This function is supposed to be called on basis of PE from top 869 * to bottom style. So the the I/O or MMIO segment assigned to 870 * parent PE could be overrided by its child PEs if necessary. 871 */ 872 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 873 struct pnv_ioda_pe *pe) 874 { 875 struct pnv_phb *phb = hose->private_data; 876 struct pci_bus_region region; 877 struct resource *res; 878 int i, index; 879 int rc; 880 881 /* 882 * NOTE: We only care PCI bus based PE for now. For PCI 883 * device based PE, for example SRIOV sensitive VF should 884 * be figured out later. 885 */ 886 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 887 888 pci_bus_for_each_resource(pe->pbus, res, i) { 889 if (!res || !res->flags || 890 res->start > res->end) 891 continue; 892 893 if (res->flags & IORESOURCE_IO) { 894 region.start = res->start - phb->ioda.io_pci_base; 895 region.end = res->end - phb->ioda.io_pci_base; 896 index = region.start / phb->ioda.io_segsize; 897 898 while (index < phb->ioda.total_pe && 899 region.start <= region.end) { 900 phb->ioda.io_segmap[index] = pe->pe_number; 901 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 902 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 903 if (rc != OPAL_SUCCESS) { 904 pr_err("%s: OPAL error %d when mapping IO " 905 "segment #%d to PE#%d\n", 906 __func__, rc, index, pe->pe_number); 907 break; 908 } 909 910 region.start += phb->ioda.io_segsize; 911 index++; 912 } 913 } else if (res->flags & IORESOURCE_MEM) { 914 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 915 * harden that algorithm when we start supporting M64 916 */ 917 region.start = res->start - 918 hose->mem_offset[0] - 919 phb->ioda.m32_pci_base; 920 region.end = res->end - 921 hose->mem_offset[0] - 922 phb->ioda.m32_pci_base; 923 index = region.start / phb->ioda.m32_segsize; 924 925 while (index < phb->ioda.total_pe && 926 region.start <= region.end) { 927 phb->ioda.m32_segmap[index] = pe->pe_number; 928 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 929 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 930 if (rc != OPAL_SUCCESS) { 931 pr_err("%s: OPAL error %d when mapping M32 " 932 "segment#%d to PE#%d", 933 __func__, rc, index, pe->pe_number); 934 break; 935 } 936 937 region.start += phb->ioda.m32_segsize; 938 index++; 939 } 940 } 941 } 942 } 943 944 static void pnv_pci_ioda_setup_seg(void) 945 { 946 struct pci_controller *tmp, *hose; 947 struct pnv_phb *phb; 948 struct pnv_ioda_pe *pe; 949 950 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 951 phb = hose->private_data; 952 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 953 pnv_ioda_setup_pe_seg(hose, pe); 954 } 955 } 956 } 957 958 static void pnv_pci_ioda_setup_DMA(void) 959 { 960 struct pci_controller *hose, *tmp; 961 struct pnv_phb *phb; 962 963 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 964 pnv_ioda_setup_dma(hose->private_data); 965 966 /* Mark the PHB initialization done */ 967 phb = hose->private_data; 968 phb->initialized = 1; 969 } 970 } 971 972 static void pnv_pci_ioda_fixup(void) 973 { 974 pnv_pci_ioda_setup_PEs(); 975 pnv_pci_ioda_setup_seg(); 976 pnv_pci_ioda_setup_DMA(); 977 978 #ifdef CONFIG_EEH 979 eeh_addr_cache_build(); 980 eeh_init(); 981 #endif 982 } 983 984 /* 985 * Returns the alignment for I/O or memory windows for P2P 986 * bridges. That actually depends on how PEs are segmented. 987 * For now, we return I/O or M32 segment size for PE sensitive 988 * P2P bridges. Otherwise, the default values (4KiB for I/O, 989 * 1MiB for memory) will be returned. 990 * 991 * The current PCI bus might be put into one PE, which was 992 * create against the parent PCI bridge. For that case, we 993 * needn't enlarge the alignment so that we can save some 994 * resources. 995 */ 996 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 997 unsigned long type) 998 { 999 struct pci_dev *bridge; 1000 struct pci_controller *hose = pci_bus_to_host(bus); 1001 struct pnv_phb *phb = hose->private_data; 1002 int num_pci_bridges = 0; 1003 1004 bridge = bus->self; 1005 while (bridge) { 1006 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1007 num_pci_bridges++; 1008 if (num_pci_bridges >= 2) 1009 return 1; 1010 } 1011 1012 bridge = bridge->bus->self; 1013 } 1014 1015 /* We need support prefetchable memory window later */ 1016 if (type & IORESOURCE_MEM) 1017 return phb->ioda.m32_segsize; 1018 1019 return phb->ioda.io_segsize; 1020 } 1021 1022 /* Prevent enabling devices for which we couldn't properly 1023 * assign a PE 1024 */ 1025 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1026 { 1027 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1028 struct pnv_phb *phb = hose->private_data; 1029 struct pci_dn *pdn; 1030 1031 /* The function is probably called while the PEs have 1032 * not be created yet. For example, resource reassignment 1033 * during PCI probe period. We just skip the check if 1034 * PEs isn't ready. 1035 */ 1036 if (!phb->initialized) 1037 return 0; 1038 1039 pdn = pci_get_pdn(dev); 1040 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1041 return -EINVAL; 1042 1043 return 0; 1044 } 1045 1046 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1047 u32 devfn) 1048 { 1049 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1050 } 1051 1052 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1053 { 1054 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1055 OPAL_ASSERT_RESET); 1056 } 1057 1058 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1059 u64 hub_id, int ioda_type) 1060 { 1061 struct pci_controller *hose; 1062 static int primary = 1; 1063 struct pnv_phb *phb; 1064 unsigned long size, m32map_off, iomap_off, pemap_off; 1065 const u64 *prop64; 1066 const u32 *prop32; 1067 u64 phb_id; 1068 void *aux; 1069 long rc; 1070 1071 pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1072 1073 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1074 if (!prop64) { 1075 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1076 return; 1077 } 1078 phb_id = be64_to_cpup(prop64); 1079 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1080 1081 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1082 if (phb) { 1083 memset(phb, 0, sizeof(struct pnv_phb)); 1084 phb->hose = hose = pcibios_alloc_controller(np); 1085 } 1086 if (!phb || !phb->hose) { 1087 pr_err("PCI: Failed to allocate PCI controller for %s\n", 1088 np->full_name); 1089 return; 1090 } 1091 1092 spin_lock_init(&phb->lock); 1093 /* XXX Use device-tree */ 1094 hose->first_busno = 0; 1095 hose->last_busno = 0xff; 1096 hose->private_data = phb; 1097 phb->hub_id = hub_id; 1098 phb->opal_id = phb_id; 1099 phb->type = ioda_type; 1100 1101 /* Detect specific models for error handling */ 1102 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1103 phb->model = PNV_PHB_MODEL_P7IOC; 1104 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1105 phb->model = PNV_PHB_MODEL_PHB3; 1106 else 1107 phb->model = PNV_PHB_MODEL_UNKNOWN; 1108 1109 /* Parse 32-bit and IO ranges (if any) */ 1110 pci_process_bridge_OF_ranges(phb->hose, np, primary); 1111 primary = 0; 1112 1113 /* Get registers */ 1114 phb->regs = of_iomap(np, 0); 1115 if (phb->regs == NULL) 1116 pr_err(" Failed to map registers !\n"); 1117 1118 /* Initialize more IODA stuff */ 1119 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1120 if (!prop32) 1121 phb->ioda.total_pe = 1; 1122 else 1123 phb->ioda.total_pe = *prop32; 1124 1125 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1126 /* FW Has already off top 64k of M32 space (MSI space) */ 1127 phb->ioda.m32_size += 0x10000; 1128 1129 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1130 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1131 phb->ioda.io_size = hose->pci_io_size; 1132 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1133 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1134 1135 /* Allocate aux data & arrays 1136 * 1137 * XXX TODO: Don't allocate io segmap on PHB3 1138 */ 1139 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1140 m32map_off = size; 1141 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1142 iomap_off = size; 1143 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1144 pemap_off = size; 1145 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1146 aux = alloc_bootmem(size); 1147 memset(aux, 0, size); 1148 phb->ioda.pe_alloc = aux; 1149 phb->ioda.m32_segmap = aux + m32map_off; 1150 phb->ioda.io_segmap = aux + iomap_off; 1151 phb->ioda.pe_array = aux + pemap_off; 1152 set_bit(0, phb->ioda.pe_alloc); 1153 1154 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1155 INIT_LIST_HEAD(&phb->ioda.pe_list); 1156 1157 /* Calculate how many 32-bit TCE segments we have */ 1158 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1159 1160 /* Clear unusable m64 */ 1161 hose->mem_resources[1].flags = 0; 1162 hose->mem_resources[1].start = 0; 1163 hose->mem_resources[1].end = 0; 1164 hose->mem_resources[2].flags = 0; 1165 hose->mem_resources[2].start = 0; 1166 hose->mem_resources[2].end = 0; 1167 1168 #if 0 /* We should really do that ... */ 1169 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1170 window_type, 1171 window_num, 1172 starting_real_address, 1173 starting_pci_address, 1174 segment_size); 1175 #endif 1176 1177 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n", 1178 phb->ioda.total_pe, 1179 phb->ioda.m32_size, phb->ioda.m32_segsize, 1180 phb->ioda.io_size, phb->ioda.io_segsize); 1181 1182 phb->hose->ops = &pnv_pci_ops; 1183 #ifdef CONFIG_EEH 1184 phb->eeh_ops = &ioda_eeh_ops; 1185 #endif 1186 1187 /* Setup RID -> PE mapping function */ 1188 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1189 1190 /* Setup TCEs */ 1191 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1192 1193 /* Setup shutdown function for kexec */ 1194 phb->shutdown = pnv_pci_ioda_shutdown; 1195 1196 /* Setup MSI support */ 1197 pnv_pci_init_ioda_msis(phb); 1198 1199 /* 1200 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1201 * to let the PCI core do resource assignment. It's supposed 1202 * that the PCI core will do correct I/O and MMIO alignment 1203 * for the P2P bridge bars so that each PCI bus (excluding 1204 * the child P2P bridges) can form individual PE. 1205 */ 1206 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1207 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1208 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1209 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1210 1211 /* Reset IODA tables to a clean state */ 1212 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1213 if (rc) 1214 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1215 1216 /* 1217 * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset 1218 * has cleared the RTT which has the same effect 1219 */ 1220 if (ioda_type == PNV_PHB_IODA1) 1221 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); 1222 } 1223 1224 void pnv_pci_init_ioda2_phb(struct device_node *np) 1225 { 1226 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1227 } 1228 1229 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1230 { 1231 struct device_node *phbn; 1232 const u64 *prop64; 1233 u64 hub_id; 1234 1235 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1236 1237 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1238 if (!prop64) { 1239 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1240 return; 1241 } 1242 hub_id = be64_to_cpup(prop64); 1243 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1244 1245 /* Count child PHBs */ 1246 for_each_child_of_node(np, phbn) { 1247 /* Look for IODA1 PHBs */ 1248 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1249 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1250 } 1251 } 1252