1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/crash_dump.h> 17 #include <linux/debugfs.h> 18 #include <linux/delay.h> 19 #include <linux/string.h> 20 #include <linux/init.h> 21 #include <linux/bootmem.h> 22 #include <linux/irq.h> 23 #include <linux/io.h> 24 #include <linux/msi.h> 25 #include <linux/memblock.h> 26 27 #include <asm/sections.h> 28 #include <asm/io.h> 29 #include <asm/prom.h> 30 #include <asm/pci-bridge.h> 31 #include <asm/machdep.h> 32 #include <asm/msi_bitmap.h> 33 #include <asm/ppc-pci.h> 34 #include <asm/opal.h> 35 #include <asm/iommu.h> 36 #include <asm/tce.h> 37 #include <asm/xics.h> 38 #include <asm/debug.h> 39 #include <asm/firmware.h> 40 #include <asm/pnv-pci.h> 41 42 #include <misc/cxl.h> 43 44 #include "powernv.h" 45 #include "pci.h" 46 47 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 48 const char *fmt, ...) 49 { 50 struct va_format vaf; 51 va_list args; 52 char pfix[32]; 53 54 va_start(args, fmt); 55 56 vaf.fmt = fmt; 57 vaf.va = &args; 58 59 if (pe->pdev) 60 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); 61 else 62 sprintf(pfix, "%04x:%02x ", 63 pci_domain_nr(pe->pbus), pe->pbus->number); 64 65 printk("%spci %s: [PE# %.3d] %pV", 66 level, pfix, pe->pe_number, &vaf); 67 68 va_end(args); 69 } 70 71 #define pe_err(pe, fmt, ...) \ 72 pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) 73 #define pe_warn(pe, fmt, ...) \ 74 pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) 75 #define pe_info(pe, fmt, ...) \ 76 pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) 77 78 /* 79 * stdcix is only supposed to be used in hypervisor real mode as per 80 * the architecture spec 81 */ 82 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 83 { 84 __asm__ __volatile__("stdcix %0,0,%1" 85 : : "r" (val), "r" (paddr) : "memory"); 86 } 87 88 static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) 89 { 90 return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == 91 (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); 92 } 93 94 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 95 { 96 unsigned long pe; 97 98 do { 99 pe = find_next_zero_bit(phb->ioda.pe_alloc, 100 phb->ioda.total_pe, 0); 101 if (pe >= phb->ioda.total_pe) 102 return IODA_INVALID_PE; 103 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 104 105 phb->ioda.pe_array[pe].phb = phb; 106 phb->ioda.pe_array[pe].pe_number = pe; 107 return pe; 108 } 109 110 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 111 { 112 WARN_ON(phb->ioda.pe_array[pe].pdev); 113 114 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 115 clear_bit(pe, phb->ioda.pe_alloc); 116 } 117 118 /* The default M64 BAR is shared by all PEs */ 119 static int pnv_ioda2_init_m64(struct pnv_phb *phb) 120 { 121 const char *desc; 122 struct resource *r; 123 s64 rc; 124 125 /* Configure the default M64 BAR */ 126 rc = opal_pci_set_phb_mem_window(phb->opal_id, 127 OPAL_M64_WINDOW_TYPE, 128 phb->ioda.m64_bar_idx, 129 phb->ioda.m64_base, 130 0, /* unused */ 131 phb->ioda.m64_size); 132 if (rc != OPAL_SUCCESS) { 133 desc = "configuring"; 134 goto fail; 135 } 136 137 /* Enable the default M64 BAR */ 138 rc = opal_pci_phb_mmio_enable(phb->opal_id, 139 OPAL_M64_WINDOW_TYPE, 140 phb->ioda.m64_bar_idx, 141 OPAL_ENABLE_M64_SPLIT); 142 if (rc != OPAL_SUCCESS) { 143 desc = "enabling"; 144 goto fail; 145 } 146 147 /* Mark the M64 BAR assigned */ 148 set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); 149 150 /* 151 * Strip off the segment used by the reserved PE, which is 152 * expected to be 0 or last one of PE capabicity. 153 */ 154 r = &phb->hose->mem_resources[1]; 155 if (phb->ioda.reserved_pe == 0) 156 r->start += phb->ioda.m64_segsize; 157 else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) 158 r->end -= phb->ioda.m64_segsize; 159 else 160 pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", 161 phb->ioda.reserved_pe); 162 163 return 0; 164 165 fail: 166 pr_warn(" Failure %lld %s M64 BAR#%d\n", 167 rc, desc, phb->ioda.m64_bar_idx); 168 opal_pci_phb_mmio_enable(phb->opal_id, 169 OPAL_M64_WINDOW_TYPE, 170 phb->ioda.m64_bar_idx, 171 OPAL_DISABLE_M64); 172 return -EIO; 173 } 174 175 static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) 176 { 177 resource_size_t sgsz = phb->ioda.m64_segsize; 178 struct pci_dev *pdev; 179 struct resource *r; 180 int base, step, i; 181 182 /* 183 * Root bus always has full M64 range and root port has 184 * M64 range used in reality. So we're checking root port 185 * instead of root bus. 186 */ 187 list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { 188 for (i = PCI_BRIDGE_RESOURCES; 189 i <= PCI_BRIDGE_RESOURCE_END; i++) { 190 r = &pdev->resource[i]; 191 if (!r->parent || 192 !pnv_pci_is_mem_pref_64(r->flags)) 193 continue; 194 195 base = (r->start - phb->ioda.m64_base) / sgsz; 196 for (step = 0; step < resource_size(r) / sgsz; step++) 197 set_bit(base + step, phb->ioda.pe_alloc); 198 } 199 } 200 } 201 202 static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, 203 struct pci_bus *bus, int all) 204 { 205 resource_size_t segsz = phb->ioda.m64_segsize; 206 struct pci_dev *pdev; 207 struct resource *r; 208 struct pnv_ioda_pe *master_pe, *pe; 209 unsigned long size, *pe_alloc; 210 bool found; 211 int start, i, j; 212 213 /* Root bus shouldn't use M64 */ 214 if (pci_is_root_bus(bus)) 215 return IODA_INVALID_PE; 216 217 /* We support only one M64 window on each bus */ 218 found = false; 219 pci_bus_for_each_resource(bus, r, i) { 220 if (r && r->parent && 221 pnv_pci_is_mem_pref_64(r->flags)) { 222 found = true; 223 break; 224 } 225 } 226 227 /* No M64 window found ? */ 228 if (!found) 229 return IODA_INVALID_PE; 230 231 /* Allocate bitmap */ 232 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 233 pe_alloc = kzalloc(size, GFP_KERNEL); 234 if (!pe_alloc) { 235 pr_warn("%s: Out of memory !\n", 236 __func__); 237 return IODA_INVALID_PE; 238 } 239 240 /* 241 * Figure out reserved PE numbers by the PE 242 * the its child PEs. 243 */ 244 start = (r->start - phb->ioda.m64_base) / segsz; 245 for (i = 0; i < resource_size(r) / segsz; i++) 246 set_bit(start + i, pe_alloc); 247 248 if (all) 249 goto done; 250 251 /* 252 * If the PE doesn't cover all subordinate buses, 253 * we need subtract from reserved PEs for children. 254 */ 255 list_for_each_entry(pdev, &bus->devices, bus_list) { 256 if (!pdev->subordinate) 257 continue; 258 259 pci_bus_for_each_resource(pdev->subordinate, r, i) { 260 if (!r || !r->parent || 261 !pnv_pci_is_mem_pref_64(r->flags)) 262 continue; 263 264 start = (r->start - phb->ioda.m64_base) / segsz; 265 for (j = 0; j < resource_size(r) / segsz ; j++) 266 clear_bit(start + j, pe_alloc); 267 } 268 } 269 270 /* 271 * the current bus might not own M64 window and that's all 272 * contributed by its child buses. For the case, we needn't 273 * pick M64 dependent PE#. 274 */ 275 if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { 276 kfree(pe_alloc); 277 return IODA_INVALID_PE; 278 } 279 280 /* 281 * Figure out the master PE and put all slave PEs to master 282 * PE's list to form compound PE. 283 */ 284 done: 285 master_pe = NULL; 286 i = -1; 287 while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < 288 phb->ioda.total_pe) { 289 pe = &phb->ioda.pe_array[i]; 290 pe->phb = phb; 291 pe->pe_number = i; 292 293 if (!master_pe) { 294 pe->flags |= PNV_IODA_PE_MASTER; 295 INIT_LIST_HEAD(&pe->slaves); 296 master_pe = pe; 297 } else { 298 pe->flags |= PNV_IODA_PE_SLAVE; 299 pe->master = master_pe; 300 list_add_tail(&pe->list, &master_pe->slaves); 301 } 302 } 303 304 kfree(pe_alloc); 305 return master_pe->pe_number; 306 } 307 308 static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) 309 { 310 struct pci_controller *hose = phb->hose; 311 struct device_node *dn = hose->dn; 312 struct resource *res; 313 const u32 *r; 314 u64 pci_addr; 315 316 if (!firmware_has_feature(FW_FEATURE_OPALv3)) { 317 pr_info(" Firmware too old to support M64 window\n"); 318 return; 319 } 320 321 r = of_get_property(dn, "ibm,opal-m64-window", NULL); 322 if (!r) { 323 pr_info(" No <ibm,opal-m64-window> on %s\n", 324 dn->full_name); 325 return; 326 } 327 328 /* FIXME: Support M64 for P7IOC */ 329 if (phb->type != PNV_PHB_IODA2) { 330 pr_info(" Not support M64 window\n"); 331 return; 332 } 333 334 res = &hose->mem_resources[1]; 335 res->start = of_translate_address(dn, r + 2); 336 res->end = res->start + of_read_number(r + 4, 2) - 1; 337 res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); 338 pci_addr = of_read_number(r, 2); 339 hose->mem_offset[1] = res->start - pci_addr; 340 341 phb->ioda.m64_size = resource_size(res); 342 phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; 343 phb->ioda.m64_base = pci_addr; 344 345 /* Use last M64 BAR to cover M64 window */ 346 phb->ioda.m64_bar_idx = 15; 347 phb->init_m64 = pnv_ioda2_init_m64; 348 phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; 349 phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; 350 } 351 352 static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) 353 { 354 struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; 355 struct pnv_ioda_pe *slave; 356 s64 rc; 357 358 /* Fetch master PE */ 359 if (pe->flags & PNV_IODA_PE_SLAVE) { 360 pe = pe->master; 361 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 362 pe_no = pe->pe_number; 363 } 364 365 /* Freeze master PE */ 366 rc = opal_pci_eeh_freeze_set(phb->opal_id, 367 pe_no, 368 OPAL_EEH_ACTION_SET_FREEZE_ALL); 369 if (rc != OPAL_SUCCESS) { 370 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 371 __func__, rc, phb->hose->global_number, pe_no); 372 return; 373 } 374 375 /* Freeze slave PEs */ 376 if (!(pe->flags & PNV_IODA_PE_MASTER)) 377 return; 378 379 list_for_each_entry(slave, &pe->slaves, list) { 380 rc = opal_pci_eeh_freeze_set(phb->opal_id, 381 slave->pe_number, 382 OPAL_EEH_ACTION_SET_FREEZE_ALL); 383 if (rc != OPAL_SUCCESS) 384 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 385 __func__, rc, phb->hose->global_number, 386 slave->pe_number); 387 } 388 } 389 390 static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) 391 { 392 struct pnv_ioda_pe *pe, *slave; 393 s64 rc; 394 395 /* Find master PE */ 396 pe = &phb->ioda.pe_array[pe_no]; 397 if (pe->flags & PNV_IODA_PE_SLAVE) { 398 pe = pe->master; 399 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 400 pe_no = pe->pe_number; 401 } 402 403 /* Clear frozen state for master PE */ 404 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); 405 if (rc != OPAL_SUCCESS) { 406 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 407 __func__, rc, opt, phb->hose->global_number, pe_no); 408 return -EIO; 409 } 410 411 if (!(pe->flags & PNV_IODA_PE_MASTER)) 412 return 0; 413 414 /* Clear frozen state for slave PEs */ 415 list_for_each_entry(slave, &pe->slaves, list) { 416 rc = opal_pci_eeh_freeze_clear(phb->opal_id, 417 slave->pe_number, 418 opt); 419 if (rc != OPAL_SUCCESS) { 420 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 421 __func__, rc, opt, phb->hose->global_number, 422 slave->pe_number); 423 return -EIO; 424 } 425 } 426 427 return 0; 428 } 429 430 static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) 431 { 432 struct pnv_ioda_pe *slave, *pe; 433 u8 fstate, state; 434 __be16 pcierr; 435 s64 rc; 436 437 /* Sanity check on PE number */ 438 if (pe_no < 0 || pe_no >= phb->ioda.total_pe) 439 return OPAL_EEH_STOPPED_PERM_UNAVAIL; 440 441 /* 442 * Fetch the master PE and the PE instance might be 443 * not initialized yet. 444 */ 445 pe = &phb->ioda.pe_array[pe_no]; 446 if (pe->flags & PNV_IODA_PE_SLAVE) { 447 pe = pe->master; 448 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 449 pe_no = pe->pe_number; 450 } 451 452 /* Check the master PE */ 453 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, 454 &state, &pcierr, NULL); 455 if (rc != OPAL_SUCCESS) { 456 pr_warn("%s: Failure %lld getting " 457 "PHB#%x-PE#%x state\n", 458 __func__, rc, 459 phb->hose->global_number, pe_no); 460 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 461 } 462 463 /* Check the slave PE */ 464 if (!(pe->flags & PNV_IODA_PE_MASTER)) 465 return state; 466 467 list_for_each_entry(slave, &pe->slaves, list) { 468 rc = opal_pci_eeh_freeze_status(phb->opal_id, 469 slave->pe_number, 470 &fstate, 471 &pcierr, 472 NULL); 473 if (rc != OPAL_SUCCESS) { 474 pr_warn("%s: Failure %lld getting " 475 "PHB#%x-PE#%x state\n", 476 __func__, rc, 477 phb->hose->global_number, slave->pe_number); 478 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 479 } 480 481 /* 482 * Override the result based on the ascending 483 * priority. 484 */ 485 if (fstate > state) 486 state = fstate; 487 } 488 489 return state; 490 } 491 492 /* Currently those 2 are only used when MSIs are enabled, this will change 493 * but in the meantime, we need to protect them to avoid warnings 494 */ 495 #ifdef CONFIG_PCI_MSI 496 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 497 { 498 struct pci_controller *hose = pci_bus_to_host(dev->bus); 499 struct pnv_phb *phb = hose->private_data; 500 struct pci_dn *pdn = pci_get_pdn(dev); 501 502 if (!pdn) 503 return NULL; 504 if (pdn->pe_number == IODA_INVALID_PE) 505 return NULL; 506 return &phb->ioda.pe_array[pdn->pe_number]; 507 } 508 #endif /* CONFIG_PCI_MSI */ 509 510 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 511 { 512 struct pci_dev *parent; 513 uint8_t bcomp, dcomp, fcomp; 514 long rc, rid_end, rid; 515 516 /* Bus validation ? */ 517 if (pe->pbus) { 518 int count; 519 520 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 521 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 522 parent = pe->pbus->self; 523 if (pe->flags & PNV_IODA_PE_BUS_ALL) 524 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 525 else 526 count = 1; 527 528 switch(count) { 529 case 1: bcomp = OpalPciBusAll; break; 530 case 2: bcomp = OpalPciBus7Bits; break; 531 case 4: bcomp = OpalPciBus6Bits; break; 532 case 8: bcomp = OpalPciBus5Bits; break; 533 case 16: bcomp = OpalPciBus4Bits; break; 534 case 32: bcomp = OpalPciBus3Bits; break; 535 default: 536 pr_err("%s: Number of subordinate busses %d" 537 " unsupported\n", 538 pci_name(pe->pbus->self), count); 539 /* Do an exact match only */ 540 bcomp = OpalPciBusAll; 541 } 542 rid_end = pe->rid + (count << 8); 543 } else { 544 parent = pe->pdev->bus->self; 545 bcomp = OpalPciBusAll; 546 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 547 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 548 rid_end = pe->rid + 1; 549 } 550 551 /* 552 * Associate PE in PELT. We need add the PE into the 553 * corresponding PELT-V as well. Otherwise, the error 554 * originated from the PE might contribute to other 555 * PEs. 556 */ 557 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 558 bcomp, dcomp, fcomp, OPAL_MAP_PE); 559 if (rc) { 560 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 561 return -ENXIO; 562 } 563 564 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 565 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 566 if (rc) 567 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 568 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 569 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 570 571 /* Add to all parents PELT-V */ 572 while (parent) { 573 struct pci_dn *pdn = pci_get_pdn(parent); 574 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 575 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 576 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 577 /* XXX What to do in case of error ? */ 578 } 579 parent = parent->bus->self; 580 } 581 /* Setup reverse map */ 582 for (rid = pe->rid; rid < rid_end; rid++) 583 phb->ioda.pe_rmap[rid] = pe->pe_number; 584 585 /* Setup one MVTs on IODA1 */ 586 if (phb->type == PNV_PHB_IODA1) { 587 pe->mve_number = pe->pe_number; 588 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 589 pe->pe_number); 590 if (rc) { 591 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 592 rc, pe->mve_number); 593 pe->mve_number = -1; 594 } else { 595 rc = opal_pci_set_mve_enable(phb->opal_id, 596 pe->mve_number, OPAL_ENABLE_MVE); 597 if (rc) { 598 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 599 rc, pe->mve_number); 600 pe->mve_number = -1; 601 } 602 } 603 } else if (phb->type == PNV_PHB_IODA2) 604 pe->mve_number = 0; 605 606 return 0; 607 } 608 609 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 610 struct pnv_ioda_pe *pe) 611 { 612 struct pnv_ioda_pe *lpe; 613 614 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 615 if (lpe->dma_weight < pe->dma_weight) { 616 list_add_tail(&pe->dma_link, &lpe->dma_link); 617 return; 618 } 619 } 620 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 621 } 622 623 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 624 { 625 /* This is quite simplistic. The "base" weight of a device 626 * is 10. 0 means no DMA is to be accounted for it. 627 */ 628 629 /* If it's a bridge, no DMA */ 630 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 631 return 0; 632 633 /* Reduce the weight of slow USB controllers */ 634 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 635 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 636 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 637 return 3; 638 639 /* Increase the weight of RAID (includes Obsidian) */ 640 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 641 return 15; 642 643 /* Default */ 644 return 10; 645 } 646 647 #if 0 648 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 649 { 650 struct pci_controller *hose = pci_bus_to_host(dev->bus); 651 struct pnv_phb *phb = hose->private_data; 652 struct pci_dn *pdn = pci_get_pdn(dev); 653 struct pnv_ioda_pe *pe; 654 int pe_num; 655 656 if (!pdn) { 657 pr_err("%s: Device tree node not associated properly\n", 658 pci_name(dev)); 659 return NULL; 660 } 661 if (pdn->pe_number != IODA_INVALID_PE) 662 return NULL; 663 664 /* PE#0 has been pre-set */ 665 if (dev->bus->number == 0) 666 pe_num = 0; 667 else 668 pe_num = pnv_ioda_alloc_pe(phb); 669 if (pe_num == IODA_INVALID_PE) { 670 pr_warning("%s: Not enough PE# available, disabling device\n", 671 pci_name(dev)); 672 return NULL; 673 } 674 675 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 676 * pointer in the PE data structure, both should be destroyed at the 677 * same time. However, this needs to be looked at more closely again 678 * once we actually start removing things (Hotplug, SR-IOV, ...) 679 * 680 * At some point we want to remove the PDN completely anyways 681 */ 682 pe = &phb->ioda.pe_array[pe_num]; 683 pci_dev_get(dev); 684 pdn->pcidev = dev; 685 pdn->pe_number = pe_num; 686 pe->pdev = dev; 687 pe->pbus = NULL; 688 pe->tce32_seg = -1; 689 pe->mve_number = -1; 690 pe->rid = dev->bus->number << 8 | pdn->devfn; 691 692 pe_info(pe, "Associated device to PE\n"); 693 694 if (pnv_ioda_configure_pe(phb, pe)) { 695 /* XXX What do we do here ? */ 696 if (pe_num) 697 pnv_ioda_free_pe(phb, pe_num); 698 pdn->pe_number = IODA_INVALID_PE; 699 pe->pdev = NULL; 700 pci_dev_put(dev); 701 return NULL; 702 } 703 704 /* Assign a DMA weight to the device */ 705 pe->dma_weight = pnv_ioda_dma_weight(dev); 706 if (pe->dma_weight != 0) { 707 phb->ioda.dma_weight += pe->dma_weight; 708 phb->ioda.dma_pe_count++; 709 } 710 711 /* Link the PE */ 712 pnv_ioda_link_pe_by_weight(phb, pe); 713 714 return pe; 715 } 716 #endif /* Useful for SRIOV case */ 717 718 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 719 { 720 struct pci_dev *dev; 721 722 list_for_each_entry(dev, &bus->devices, bus_list) { 723 struct pci_dn *pdn = pci_get_pdn(dev); 724 725 if (pdn == NULL) { 726 pr_warn("%s: No device node associated with device !\n", 727 pci_name(dev)); 728 continue; 729 } 730 pdn->pcidev = dev; 731 pdn->pe_number = pe->pe_number; 732 pe->dma_weight += pnv_ioda_dma_weight(dev); 733 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 734 pnv_ioda_setup_same_PE(dev->subordinate, pe); 735 } 736 } 737 738 /* 739 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 740 * single PCI bus. Another one that contains the primary PCI bus and its 741 * subordinate PCI devices and buses. The second type of PE is normally 742 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 743 */ 744 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 745 { 746 struct pci_controller *hose = pci_bus_to_host(bus); 747 struct pnv_phb *phb = hose->private_data; 748 struct pnv_ioda_pe *pe; 749 int pe_num = IODA_INVALID_PE; 750 751 /* Check if PE is determined by M64 */ 752 if (phb->pick_m64_pe) 753 pe_num = phb->pick_m64_pe(phb, bus, all); 754 755 /* The PE number isn't pinned by M64 */ 756 if (pe_num == IODA_INVALID_PE) 757 pe_num = pnv_ioda_alloc_pe(phb); 758 759 if (pe_num == IODA_INVALID_PE) { 760 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 761 __func__, pci_domain_nr(bus), bus->number); 762 return; 763 } 764 765 pe = &phb->ioda.pe_array[pe_num]; 766 pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 767 pe->pbus = bus; 768 pe->pdev = NULL; 769 pe->tce32_seg = -1; 770 pe->mve_number = -1; 771 pe->rid = bus->busn_res.start << 8; 772 pe->dma_weight = 0; 773 774 if (all) 775 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 776 bus->busn_res.start, bus->busn_res.end, pe_num); 777 else 778 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 779 bus->busn_res.start, pe_num); 780 781 if (pnv_ioda_configure_pe(phb, pe)) { 782 /* XXX What do we do here ? */ 783 if (pe_num) 784 pnv_ioda_free_pe(phb, pe_num); 785 pe->pbus = NULL; 786 return; 787 } 788 789 /* Associate it with all child devices */ 790 pnv_ioda_setup_same_PE(bus, pe); 791 792 /* Put PE to the list */ 793 list_add_tail(&pe->list, &phb->ioda.pe_list); 794 795 /* Account for one DMA PE if at least one DMA capable device exist 796 * below the bridge 797 */ 798 if (pe->dma_weight != 0) { 799 phb->ioda.dma_weight += pe->dma_weight; 800 phb->ioda.dma_pe_count++; 801 } 802 803 /* Link the PE */ 804 pnv_ioda_link_pe_by_weight(phb, pe); 805 } 806 807 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 808 { 809 struct pci_dev *dev; 810 811 pnv_ioda_setup_bus_PE(bus, 0); 812 813 list_for_each_entry(dev, &bus->devices, bus_list) { 814 if (dev->subordinate) { 815 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 816 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 817 else 818 pnv_ioda_setup_PEs(dev->subordinate); 819 } 820 } 821 } 822 823 /* 824 * Configure PEs so that the downstream PCI buses and devices 825 * could have their associated PE#. Unfortunately, we didn't 826 * figure out the way to identify the PLX bridge yet. So we 827 * simply put the PCI bus and the subordinate behind the root 828 * port to PE# here. The game rule here is expected to be changed 829 * as soon as we can detected PLX bridge correctly. 830 */ 831 static void pnv_pci_ioda_setup_PEs(void) 832 { 833 struct pci_controller *hose, *tmp; 834 struct pnv_phb *phb; 835 836 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 837 phb = hose->private_data; 838 839 /* M64 layout might affect PE allocation */ 840 if (phb->alloc_m64_pe) 841 phb->alloc_m64_pe(phb); 842 843 pnv_ioda_setup_PEs(hose->bus); 844 } 845 } 846 847 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 848 { 849 struct pci_dn *pdn = pci_get_pdn(pdev); 850 struct pnv_ioda_pe *pe; 851 852 /* 853 * The function can be called while the PE# 854 * hasn't been assigned. Do nothing for the 855 * case. 856 */ 857 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 858 return; 859 860 pe = &phb->ioda.pe_array[pdn->pe_number]; 861 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 862 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 863 } 864 865 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 866 struct pci_dev *pdev, u64 dma_mask) 867 { 868 struct pci_dn *pdn = pci_get_pdn(pdev); 869 struct pnv_ioda_pe *pe; 870 uint64_t top; 871 bool bypass = false; 872 873 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 874 return -ENODEV;; 875 876 pe = &phb->ioda.pe_array[pdn->pe_number]; 877 if (pe->tce_bypass_enabled) { 878 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 879 bypass = (dma_mask >= top); 880 } 881 882 if (bypass) { 883 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 884 set_dma_ops(&pdev->dev, &dma_direct_ops); 885 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 886 } else { 887 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 888 set_dma_ops(&pdev->dev, &dma_iommu_ops); 889 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 890 } 891 *pdev->dev.dma_mask = dma_mask; 892 return 0; 893 } 894 895 static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, 896 struct pci_dev *pdev) 897 { 898 struct pci_dn *pdn = pci_get_pdn(pdev); 899 struct pnv_ioda_pe *pe; 900 u64 end, mask; 901 902 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 903 return 0; 904 905 pe = &phb->ioda.pe_array[pdn->pe_number]; 906 if (!pe->tce_bypass_enabled) 907 return __dma_get_required_mask(&pdev->dev); 908 909 910 end = pe->tce_bypass_base + memblock_end_of_DRAM(); 911 mask = 1ULL << (fls64(end) - 1); 912 mask += mask - 1; 913 914 return mask; 915 } 916 917 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, 918 struct pci_bus *bus, 919 bool add_to_iommu_group) 920 { 921 struct pci_dev *dev; 922 923 list_for_each_entry(dev, &bus->devices, bus_list) { 924 if (add_to_iommu_group) 925 set_iommu_table_base_and_group(&dev->dev, 926 &pe->tce32_table); 927 else 928 set_iommu_table_base(&dev->dev, &pe->tce32_table); 929 930 if (dev->subordinate) 931 pnv_ioda_setup_bus_dma(pe, dev->subordinate, 932 add_to_iommu_group); 933 } 934 } 935 936 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 937 struct iommu_table *tbl, 938 __be64 *startp, __be64 *endp, bool rm) 939 { 940 __be64 __iomem *invalidate = rm ? 941 (__be64 __iomem *)pe->tce_inval_reg_phys : 942 (__be64 __iomem *)tbl->it_index; 943 unsigned long start, end, inc; 944 const unsigned shift = tbl->it_page_shift; 945 946 start = __pa(startp); 947 end = __pa(endp); 948 949 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 950 if (tbl->it_busno) { 951 start <<= shift; 952 end <<= shift; 953 inc = 128ull << shift; 954 start |= tbl->it_busno; 955 end |= tbl->it_busno; 956 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 957 /* p7ioc-style invalidation, 2 TCEs per write */ 958 start |= (1ull << 63); 959 end |= (1ull << 63); 960 inc = 16; 961 } else { 962 /* Default (older HW) */ 963 inc = 128; 964 } 965 966 end |= inc - 1; /* round up end to be different than start */ 967 968 mb(); /* Ensure above stores are visible */ 969 while (start <= end) { 970 if (rm) 971 __raw_rm_writeq(cpu_to_be64(start), invalidate); 972 else 973 __raw_writeq(cpu_to_be64(start), invalidate); 974 start += inc; 975 } 976 977 /* 978 * The iommu layer will do another mb() for us on build() 979 * and we don't care on free() 980 */ 981 } 982 983 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 984 struct iommu_table *tbl, 985 __be64 *startp, __be64 *endp, bool rm) 986 { 987 unsigned long start, end, inc; 988 __be64 __iomem *invalidate = rm ? 989 (__be64 __iomem *)pe->tce_inval_reg_phys : 990 (__be64 __iomem *)tbl->it_index; 991 const unsigned shift = tbl->it_page_shift; 992 993 /* We'll invalidate DMA address in PE scope */ 994 start = 0x2ull << 60; 995 start |= (pe->pe_number & 0xFF); 996 end = start; 997 998 /* Figure out the start, end and step */ 999 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 1000 start |= (inc << shift); 1001 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 1002 end |= (inc << shift); 1003 inc = (0x1ull << shift); 1004 mb(); 1005 1006 while (start <= end) { 1007 if (rm) 1008 __raw_rm_writeq(cpu_to_be64(start), invalidate); 1009 else 1010 __raw_writeq(cpu_to_be64(start), invalidate); 1011 start += inc; 1012 } 1013 } 1014 1015 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 1016 __be64 *startp, __be64 *endp, bool rm) 1017 { 1018 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1019 tce32_table); 1020 struct pnv_phb *phb = pe->phb; 1021 1022 if (phb->type == PNV_PHB_IODA1) 1023 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 1024 else 1025 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 1026 } 1027 1028 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 1029 struct pnv_ioda_pe *pe, unsigned int base, 1030 unsigned int segs) 1031 { 1032 1033 struct page *tce_mem = NULL; 1034 const __be64 *swinvp; 1035 struct iommu_table *tbl; 1036 unsigned int i; 1037 int64_t rc; 1038 void *addr; 1039 1040 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 1041 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 1042 1043 /* XXX FIXME: Handle 64-bit only DMA devices */ 1044 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 1045 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 1046 1047 /* We shouldn't already have a 32-bit DMA associated */ 1048 if (WARN_ON(pe->tce32_seg >= 0)) 1049 return; 1050 1051 /* Grab a 32-bit TCE table */ 1052 pe->tce32_seg = base; 1053 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 1054 (base << 28), ((base + segs) << 28) - 1); 1055 1056 /* XXX Currently, we allocate one big contiguous table for the 1057 * TCEs. We only really need one chunk per 256M of TCE space 1058 * (ie per segment) but that's an optimization for later, it 1059 * requires some added smarts with our get/put_tce implementation 1060 */ 1061 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1062 get_order(TCE32_TABLE_SIZE * segs)); 1063 if (!tce_mem) { 1064 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 1065 goto fail; 1066 } 1067 addr = page_address(tce_mem); 1068 memset(addr, 0, TCE32_TABLE_SIZE * segs); 1069 1070 /* Configure HW */ 1071 for (i = 0; i < segs; i++) { 1072 rc = opal_pci_map_pe_dma_window(phb->opal_id, 1073 pe->pe_number, 1074 base + i, 1, 1075 __pa(addr) + TCE32_TABLE_SIZE * i, 1076 TCE32_TABLE_SIZE, 0x1000); 1077 if (rc) { 1078 pe_err(pe, " Failed to configure 32-bit TCE table," 1079 " err %ld\n", rc); 1080 goto fail; 1081 } 1082 } 1083 1084 /* Setup linux iommu table */ 1085 tbl = &pe->tce32_table; 1086 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 1087 base << 28, IOMMU_PAGE_SHIFT_4K); 1088 1089 /* OPAL variant of P7IOC SW invalidated TCEs */ 1090 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1091 if (swinvp) { 1092 /* We need a couple more fields -- an address and a data 1093 * to or. Since the bus is only printed out on table free 1094 * errors, and on the first pass the data will be a relative 1095 * bus number, print that out instead. 1096 */ 1097 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1098 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1099 8); 1100 tbl->it_type |= (TCE_PCI_SWINV_CREATE | 1101 TCE_PCI_SWINV_FREE | 1102 TCE_PCI_SWINV_PAIR); 1103 } 1104 iommu_init_table(tbl, phb->hose->node); 1105 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1106 1107 if (pe->pdev) 1108 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1109 else 1110 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1111 1112 return; 1113 fail: 1114 /* XXX Failure: Try to fallback to 64-bit only ? */ 1115 if (pe->tce32_seg >= 0) 1116 pe->tce32_seg = -1; 1117 if (tce_mem) 1118 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 1119 } 1120 1121 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 1122 { 1123 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1124 tce32_table); 1125 uint16_t window_id = (pe->pe_number << 1 ) + 1; 1126 int64_t rc; 1127 1128 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 1129 if (enable) { 1130 phys_addr_t top = memblock_end_of_DRAM(); 1131 1132 top = roundup_pow_of_two(top); 1133 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1134 pe->pe_number, 1135 window_id, 1136 pe->tce_bypass_base, 1137 top); 1138 } else { 1139 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1140 pe->pe_number, 1141 window_id, 1142 pe->tce_bypass_base, 1143 0); 1144 1145 /* 1146 * EEH needs the mapping between IOMMU table and group 1147 * of those VFIO/KVM pass-through devices. We can postpone 1148 * resetting DMA ops until the DMA mask is configured in 1149 * host side. 1150 */ 1151 if (pe->pdev) 1152 set_iommu_table_base(&pe->pdev->dev, tbl); 1153 else 1154 pnv_ioda_setup_bus_dma(pe, pe->pbus, false); 1155 } 1156 if (rc) 1157 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 1158 else 1159 pe->tce_bypass_enabled = enable; 1160 } 1161 1162 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 1163 struct pnv_ioda_pe *pe) 1164 { 1165 /* TVE #1 is selected by PCI address bit 59 */ 1166 pe->tce_bypass_base = 1ull << 59; 1167 1168 /* Install set_bypass callback for VFIO */ 1169 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 1170 1171 /* Enable bypass by default */ 1172 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 1173 } 1174 1175 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 1176 struct pnv_ioda_pe *pe) 1177 { 1178 struct page *tce_mem = NULL; 1179 void *addr; 1180 const __be64 *swinvp; 1181 struct iommu_table *tbl; 1182 unsigned int tce_table_size, end; 1183 int64_t rc; 1184 1185 /* We shouldn't already have a 32-bit DMA associated */ 1186 if (WARN_ON(pe->tce32_seg >= 0)) 1187 return; 1188 1189 /* The PE will reserve all possible 32-bits space */ 1190 pe->tce32_seg = 0; 1191 end = (1 << ilog2(phb->ioda.m32_pci_base)); 1192 tce_table_size = (end / 0x1000) * 8; 1193 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 1194 end); 1195 1196 /* Allocate TCE table */ 1197 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1198 get_order(tce_table_size)); 1199 if (!tce_mem) { 1200 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 1201 goto fail; 1202 } 1203 addr = page_address(tce_mem); 1204 memset(addr, 0, tce_table_size); 1205 1206 /* 1207 * Map TCE table through TVT. The TVE index is the PE number 1208 * shifted by 1 bit for 32-bits DMA space. 1209 */ 1210 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 1211 pe->pe_number << 1, 1, __pa(addr), 1212 tce_table_size, 0x1000); 1213 if (rc) { 1214 pe_err(pe, "Failed to configure 32-bit TCE table," 1215 " err %ld\n", rc); 1216 goto fail; 1217 } 1218 1219 /* Setup linux iommu table */ 1220 tbl = &pe->tce32_table; 1221 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, 1222 IOMMU_PAGE_SHIFT_4K); 1223 1224 /* OPAL variant of PHB3 invalidated TCEs */ 1225 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1226 if (swinvp) { 1227 /* We need a couple more fields -- an address and a data 1228 * to or. Since the bus is only printed out on table free 1229 * errors, and on the first pass the data will be a relative 1230 * bus number, print that out instead. 1231 */ 1232 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1233 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1234 8); 1235 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); 1236 } 1237 iommu_init_table(tbl, phb->hose->node); 1238 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1239 1240 if (pe->pdev) 1241 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1242 else 1243 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1244 1245 /* Also create a bypass window */ 1246 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 1247 return; 1248 fail: 1249 if (pe->tce32_seg >= 0) 1250 pe->tce32_seg = -1; 1251 if (tce_mem) 1252 __free_pages(tce_mem, get_order(tce_table_size)); 1253 } 1254 1255 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 1256 { 1257 struct pci_controller *hose = phb->hose; 1258 unsigned int residual, remaining, segs, tw, base; 1259 struct pnv_ioda_pe *pe; 1260 1261 /* If we have more PE# than segments available, hand out one 1262 * per PE until we run out and let the rest fail. If not, 1263 * then we assign at least one segment per PE, plus more based 1264 * on the amount of devices under that PE 1265 */ 1266 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 1267 residual = 0; 1268 else 1269 residual = phb->ioda.tce32_count - 1270 phb->ioda.dma_pe_count; 1271 1272 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 1273 hose->global_number, phb->ioda.tce32_count); 1274 pr_info("PCI: %d PE# for a total weight of %d\n", 1275 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 1276 1277 /* Walk our PE list and configure their DMA segments, hand them 1278 * out one base segment plus any residual segments based on 1279 * weight 1280 */ 1281 remaining = phb->ioda.tce32_count; 1282 tw = phb->ioda.dma_weight; 1283 base = 0; 1284 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 1285 if (!pe->dma_weight) 1286 continue; 1287 if (!remaining) { 1288 pe_warn(pe, "No DMA32 resources available\n"); 1289 continue; 1290 } 1291 segs = 1; 1292 if (residual) { 1293 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 1294 if (segs > remaining) 1295 segs = remaining; 1296 } 1297 1298 /* 1299 * For IODA2 compliant PHB3, we needn't care about the weight. 1300 * The all available 32-bits DMA space will be assigned to 1301 * the specific PE. 1302 */ 1303 if (phb->type == PNV_PHB_IODA1) { 1304 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 1305 pe->dma_weight, segs); 1306 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 1307 } else { 1308 pe_info(pe, "Assign DMA32 space\n"); 1309 segs = 0; 1310 pnv_pci_ioda2_setup_dma_pe(phb, pe); 1311 } 1312 1313 remaining -= segs; 1314 base += segs; 1315 } 1316 } 1317 1318 #ifdef CONFIG_PCI_MSI 1319 static void pnv_ioda2_msi_eoi(struct irq_data *d) 1320 { 1321 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 1322 struct irq_chip *chip = irq_data_get_irq_chip(d); 1323 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 1324 ioda.irq_chip); 1325 int64_t rc; 1326 1327 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 1328 WARN_ON_ONCE(rc); 1329 1330 icp_native_eoi(d); 1331 } 1332 1333 1334 static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) 1335 { 1336 struct irq_data *idata; 1337 struct irq_chip *ichip; 1338 1339 if (phb->type != PNV_PHB_IODA2) 1340 return; 1341 1342 if (!phb->ioda.irq_chip_init) { 1343 /* 1344 * First time we setup an MSI IRQ, we need to setup the 1345 * corresponding IRQ chip to route correctly. 1346 */ 1347 idata = irq_get_irq_data(virq); 1348 ichip = irq_data_get_irq_chip(idata); 1349 phb->ioda.irq_chip_init = 1; 1350 phb->ioda.irq_chip = *ichip; 1351 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 1352 } 1353 irq_set_chip(virq, &phb->ioda.irq_chip); 1354 } 1355 1356 #ifdef CONFIG_CXL_BASE 1357 1358 struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) 1359 { 1360 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1361 1362 return hose->dn; 1363 } 1364 EXPORT_SYMBOL(pnv_pci_to_phb_node); 1365 1366 int pnv_phb_to_cxl(struct pci_dev *dev) 1367 { 1368 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1369 struct pnv_phb *phb = hose->private_data; 1370 struct pnv_ioda_pe *pe; 1371 int rc; 1372 1373 pe = pnv_ioda_get_pe(dev); 1374 if (!pe) 1375 return -ENODEV; 1376 1377 pe_info(pe, "Switching PHB to CXL\n"); 1378 1379 rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); 1380 if (rc) 1381 dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); 1382 1383 return rc; 1384 } 1385 EXPORT_SYMBOL(pnv_phb_to_cxl); 1386 1387 /* Find PHB for cxl dev and allocate MSI hwirqs? 1388 * Returns the absolute hardware IRQ number 1389 */ 1390 int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) 1391 { 1392 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1393 struct pnv_phb *phb = hose->private_data; 1394 int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); 1395 1396 if (hwirq < 0) { 1397 dev_warn(&dev->dev, "Failed to find a free MSI\n"); 1398 return -ENOSPC; 1399 } 1400 1401 return phb->msi_base + hwirq; 1402 } 1403 EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); 1404 1405 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) 1406 { 1407 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1408 struct pnv_phb *phb = hose->private_data; 1409 1410 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); 1411 } 1412 EXPORT_SYMBOL(pnv_cxl_release_hwirqs); 1413 1414 void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, 1415 struct pci_dev *dev) 1416 { 1417 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1418 struct pnv_phb *phb = hose->private_data; 1419 int i, hwirq; 1420 1421 for (i = 1; i < CXL_IRQ_RANGES; i++) { 1422 if (!irqs->range[i]) 1423 continue; 1424 pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", 1425 i, irqs->offset[i], 1426 irqs->range[i]); 1427 hwirq = irqs->offset[i] - phb->msi_base; 1428 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1429 irqs->range[i]); 1430 } 1431 } 1432 EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); 1433 1434 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, 1435 struct pci_dev *dev, int num) 1436 { 1437 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1438 struct pnv_phb *phb = hose->private_data; 1439 int i, hwirq, try; 1440 1441 memset(irqs, 0, sizeof(struct cxl_irq_ranges)); 1442 1443 /* 0 is reserved for the multiplexed PSL DSI interrupt */ 1444 for (i = 1; i < CXL_IRQ_RANGES && num; i++) { 1445 try = num; 1446 while (try) { 1447 hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); 1448 if (hwirq >= 0) 1449 break; 1450 try /= 2; 1451 } 1452 if (!try) 1453 goto fail; 1454 1455 irqs->offset[i] = phb->msi_base + hwirq; 1456 irqs->range[i] = try; 1457 pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", 1458 i, irqs->offset[i], irqs->range[i]); 1459 num -= try; 1460 } 1461 if (num) 1462 goto fail; 1463 1464 return 0; 1465 fail: 1466 pnv_cxl_release_hwirq_ranges(irqs, dev); 1467 return -ENOSPC; 1468 } 1469 EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); 1470 1471 int pnv_cxl_get_irq_count(struct pci_dev *dev) 1472 { 1473 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1474 struct pnv_phb *phb = hose->private_data; 1475 1476 return phb->msi_bmp.irq_count; 1477 } 1478 EXPORT_SYMBOL(pnv_cxl_get_irq_count); 1479 1480 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, 1481 unsigned int virq) 1482 { 1483 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1484 struct pnv_phb *phb = hose->private_data; 1485 unsigned int xive_num = hwirq - phb->msi_base; 1486 struct pnv_ioda_pe *pe; 1487 int rc; 1488 1489 if (!(pe = pnv_ioda_get_pe(dev))) 1490 return -ENODEV; 1491 1492 /* Assign XIVE to PE */ 1493 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1494 if (rc) { 1495 pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " 1496 "hwirq 0x%x XIVE 0x%x PE\n", 1497 pci_name(dev), rc, phb->msi_base, hwirq, xive_num); 1498 return -EIO; 1499 } 1500 set_msi_irq_chip(phb, virq); 1501 1502 return 0; 1503 } 1504 EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); 1505 #endif 1506 1507 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 1508 unsigned int hwirq, unsigned int virq, 1509 unsigned int is_64, struct msi_msg *msg) 1510 { 1511 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 1512 struct pci_dn *pdn = pci_get_pdn(dev); 1513 unsigned int xive_num = hwirq - phb->msi_base; 1514 __be32 data; 1515 int rc; 1516 1517 /* No PE assigned ? bail out ... no MSI for you ! */ 1518 if (pe == NULL) 1519 return -ENXIO; 1520 1521 /* Check if we have an MVE */ 1522 if (pe->mve_number < 0) 1523 return -ENXIO; 1524 1525 /* Force 32-bit MSI on some broken devices */ 1526 if (pdn && pdn->force_32bit_msi) 1527 is_64 = 0; 1528 1529 /* Assign XIVE to PE */ 1530 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1531 if (rc) { 1532 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 1533 pci_name(dev), rc, xive_num); 1534 return -EIO; 1535 } 1536 1537 if (is_64) { 1538 __be64 addr64; 1539 1540 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 1541 &addr64, &data); 1542 if (rc) { 1543 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 1544 pci_name(dev), rc); 1545 return -EIO; 1546 } 1547 msg->address_hi = be64_to_cpu(addr64) >> 32; 1548 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 1549 } else { 1550 __be32 addr32; 1551 1552 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 1553 &addr32, &data); 1554 if (rc) { 1555 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 1556 pci_name(dev), rc); 1557 return -EIO; 1558 } 1559 msg->address_hi = 0; 1560 msg->address_lo = be32_to_cpu(addr32); 1561 } 1562 msg->data = be32_to_cpu(data); 1563 1564 set_msi_irq_chip(phb, virq); 1565 1566 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 1567 " address=%x_%08x data=%x PE# %d\n", 1568 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 1569 msg->address_hi, msg->address_lo, data, pe->pe_number); 1570 1571 return 0; 1572 } 1573 1574 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 1575 { 1576 unsigned int count; 1577 const __be32 *prop = of_get_property(phb->hose->dn, 1578 "ibm,opal-msi-ranges", NULL); 1579 if (!prop) { 1580 /* BML Fallback */ 1581 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 1582 } 1583 if (!prop) 1584 return; 1585 1586 phb->msi_base = be32_to_cpup(prop); 1587 count = be32_to_cpup(prop + 1); 1588 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 1589 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 1590 phb->hose->global_number); 1591 return; 1592 } 1593 1594 phb->msi_setup = pnv_pci_ioda_msi_setup; 1595 phb->msi32_support = 1; 1596 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1597 count, phb->msi_base); 1598 } 1599 #else 1600 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1601 #endif /* CONFIG_PCI_MSI */ 1602 1603 /* 1604 * This function is supposed to be called on basis of PE from top 1605 * to bottom style. So the the I/O or MMIO segment assigned to 1606 * parent PE could be overrided by its child PEs if necessary. 1607 */ 1608 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1609 struct pnv_ioda_pe *pe) 1610 { 1611 struct pnv_phb *phb = hose->private_data; 1612 struct pci_bus_region region; 1613 struct resource *res; 1614 int i, index; 1615 int rc; 1616 1617 /* 1618 * NOTE: We only care PCI bus based PE for now. For PCI 1619 * device based PE, for example SRIOV sensitive VF should 1620 * be figured out later. 1621 */ 1622 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1623 1624 pci_bus_for_each_resource(pe->pbus, res, i) { 1625 if (!res || !res->flags || 1626 res->start > res->end) 1627 continue; 1628 1629 if (res->flags & IORESOURCE_IO) { 1630 region.start = res->start - phb->ioda.io_pci_base; 1631 region.end = res->end - phb->ioda.io_pci_base; 1632 index = region.start / phb->ioda.io_segsize; 1633 1634 while (index < phb->ioda.total_pe && 1635 region.start <= region.end) { 1636 phb->ioda.io_segmap[index] = pe->pe_number; 1637 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1638 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1639 if (rc != OPAL_SUCCESS) { 1640 pr_err("%s: OPAL error %d when mapping IO " 1641 "segment #%d to PE#%d\n", 1642 __func__, rc, index, pe->pe_number); 1643 break; 1644 } 1645 1646 region.start += phb->ioda.io_segsize; 1647 index++; 1648 } 1649 } else if (res->flags & IORESOURCE_MEM) { 1650 region.start = res->start - 1651 hose->mem_offset[0] - 1652 phb->ioda.m32_pci_base; 1653 region.end = res->end - 1654 hose->mem_offset[0] - 1655 phb->ioda.m32_pci_base; 1656 index = region.start / phb->ioda.m32_segsize; 1657 1658 while (index < phb->ioda.total_pe && 1659 region.start <= region.end) { 1660 phb->ioda.m32_segmap[index] = pe->pe_number; 1661 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1662 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1663 if (rc != OPAL_SUCCESS) { 1664 pr_err("%s: OPAL error %d when mapping M32 " 1665 "segment#%d to PE#%d", 1666 __func__, rc, index, pe->pe_number); 1667 break; 1668 } 1669 1670 region.start += phb->ioda.m32_segsize; 1671 index++; 1672 } 1673 } 1674 } 1675 } 1676 1677 static void pnv_pci_ioda_setup_seg(void) 1678 { 1679 struct pci_controller *tmp, *hose; 1680 struct pnv_phb *phb; 1681 struct pnv_ioda_pe *pe; 1682 1683 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1684 phb = hose->private_data; 1685 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1686 pnv_ioda_setup_pe_seg(hose, pe); 1687 } 1688 } 1689 } 1690 1691 static void pnv_pci_ioda_setup_DMA(void) 1692 { 1693 struct pci_controller *hose, *tmp; 1694 struct pnv_phb *phb; 1695 1696 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1697 pnv_ioda_setup_dma(hose->private_data); 1698 1699 /* Mark the PHB initialization done */ 1700 phb = hose->private_data; 1701 phb->initialized = 1; 1702 } 1703 } 1704 1705 static void pnv_pci_ioda_create_dbgfs(void) 1706 { 1707 #ifdef CONFIG_DEBUG_FS 1708 struct pci_controller *hose, *tmp; 1709 struct pnv_phb *phb; 1710 char name[16]; 1711 1712 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1713 phb = hose->private_data; 1714 1715 sprintf(name, "PCI%04x", hose->global_number); 1716 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1717 if (!phb->dbgfs) 1718 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1719 __func__, hose->global_number); 1720 } 1721 #endif /* CONFIG_DEBUG_FS */ 1722 } 1723 1724 static void pnv_pci_ioda_fixup(void) 1725 { 1726 pnv_pci_ioda_setup_PEs(); 1727 pnv_pci_ioda_setup_seg(); 1728 pnv_pci_ioda_setup_DMA(); 1729 1730 pnv_pci_ioda_create_dbgfs(); 1731 1732 #ifdef CONFIG_EEH 1733 eeh_init(); 1734 eeh_addr_cache_build(); 1735 #endif 1736 } 1737 1738 /* 1739 * Returns the alignment for I/O or memory windows for P2P 1740 * bridges. That actually depends on how PEs are segmented. 1741 * For now, we return I/O or M32 segment size for PE sensitive 1742 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1743 * 1MiB for memory) will be returned. 1744 * 1745 * The current PCI bus might be put into one PE, which was 1746 * create against the parent PCI bridge. For that case, we 1747 * needn't enlarge the alignment so that we can save some 1748 * resources. 1749 */ 1750 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1751 unsigned long type) 1752 { 1753 struct pci_dev *bridge; 1754 struct pci_controller *hose = pci_bus_to_host(bus); 1755 struct pnv_phb *phb = hose->private_data; 1756 int num_pci_bridges = 0; 1757 1758 bridge = bus->self; 1759 while (bridge) { 1760 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1761 num_pci_bridges++; 1762 if (num_pci_bridges >= 2) 1763 return 1; 1764 } 1765 1766 bridge = bridge->bus->self; 1767 } 1768 1769 /* We fail back to M32 if M64 isn't supported */ 1770 if (phb->ioda.m64_segsize && 1771 pnv_pci_is_mem_pref_64(type)) 1772 return phb->ioda.m64_segsize; 1773 if (type & IORESOURCE_MEM) 1774 return phb->ioda.m32_segsize; 1775 1776 return phb->ioda.io_segsize; 1777 } 1778 1779 /* Prevent enabling devices for which we couldn't properly 1780 * assign a PE 1781 */ 1782 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1783 { 1784 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1785 struct pnv_phb *phb = hose->private_data; 1786 struct pci_dn *pdn; 1787 1788 /* The function is probably called while the PEs have 1789 * not be created yet. For example, resource reassignment 1790 * during PCI probe period. We just skip the check if 1791 * PEs isn't ready. 1792 */ 1793 if (!phb->initialized) 1794 return 0; 1795 1796 pdn = pci_get_pdn(dev); 1797 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1798 return -EINVAL; 1799 1800 return 0; 1801 } 1802 1803 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1804 u32 devfn) 1805 { 1806 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1807 } 1808 1809 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1810 { 1811 opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, 1812 OPAL_ASSERT_RESET); 1813 } 1814 1815 static void __init pnv_pci_init_ioda_phb(struct device_node *np, 1816 u64 hub_id, int ioda_type) 1817 { 1818 struct pci_controller *hose; 1819 struct pnv_phb *phb; 1820 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1821 const __be64 *prop64; 1822 const __be32 *prop32; 1823 int len; 1824 u64 phb_id; 1825 void *aux; 1826 long rc; 1827 1828 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1829 1830 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1831 if (!prop64) { 1832 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1833 return; 1834 } 1835 phb_id = be64_to_cpup(prop64); 1836 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1837 1838 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1839 if (!phb) { 1840 pr_err(" Out of memory !\n"); 1841 return; 1842 } 1843 1844 /* Allocate PCI controller */ 1845 memset(phb, 0, sizeof(struct pnv_phb)); 1846 phb->hose = hose = pcibios_alloc_controller(np); 1847 if (!phb->hose) { 1848 pr_err(" Can't allocate PCI controller for %s\n", 1849 np->full_name); 1850 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1851 return; 1852 } 1853 1854 spin_lock_init(&phb->lock); 1855 prop32 = of_get_property(np, "bus-range", &len); 1856 if (prop32 && len == 8) { 1857 hose->first_busno = be32_to_cpu(prop32[0]); 1858 hose->last_busno = be32_to_cpu(prop32[1]); 1859 } else { 1860 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1861 hose->first_busno = 0; 1862 hose->last_busno = 0xff; 1863 } 1864 hose->private_data = phb; 1865 phb->hub_id = hub_id; 1866 phb->opal_id = phb_id; 1867 phb->type = ioda_type; 1868 1869 /* Detect specific models for error handling */ 1870 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1871 phb->model = PNV_PHB_MODEL_P7IOC; 1872 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1873 phb->model = PNV_PHB_MODEL_PHB3; 1874 else 1875 phb->model = PNV_PHB_MODEL_UNKNOWN; 1876 1877 /* Parse 32-bit and IO ranges (if any) */ 1878 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1879 1880 /* Get registers */ 1881 phb->regs = of_iomap(np, 0); 1882 if (phb->regs == NULL) 1883 pr_err(" Failed to map registers !\n"); 1884 1885 /* Initialize more IODA stuff */ 1886 phb->ioda.total_pe = 1; 1887 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1888 if (prop32) 1889 phb->ioda.total_pe = be32_to_cpup(prop32); 1890 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1891 if (prop32) 1892 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1893 1894 /* Parse 64-bit MMIO range */ 1895 pnv_ioda_parse_m64_window(phb); 1896 1897 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1898 /* FW Has already off top 64k of M32 space (MSI space) */ 1899 phb->ioda.m32_size += 0x10000; 1900 1901 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1902 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1903 phb->ioda.io_size = hose->pci_io_size; 1904 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1905 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1906 1907 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1908 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1909 m32map_off = size; 1910 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1911 if (phb->type == PNV_PHB_IODA1) { 1912 iomap_off = size; 1913 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1914 } 1915 pemap_off = size; 1916 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1917 aux = alloc_bootmem(size); 1918 memset(aux, 0, size); 1919 phb->ioda.pe_alloc = aux; 1920 phb->ioda.m32_segmap = aux + m32map_off; 1921 if (phb->type == PNV_PHB_IODA1) 1922 phb->ioda.io_segmap = aux + iomap_off; 1923 phb->ioda.pe_array = aux + pemap_off; 1924 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1925 1926 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1927 INIT_LIST_HEAD(&phb->ioda.pe_list); 1928 1929 /* Calculate how many 32-bit TCE segments we have */ 1930 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1931 1932 #if 0 /* We should really do that ... */ 1933 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1934 window_type, 1935 window_num, 1936 starting_real_address, 1937 starting_pci_address, 1938 segment_size); 1939 #endif 1940 1941 pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", 1942 phb->ioda.total_pe, phb->ioda.reserved_pe, 1943 phb->ioda.m32_size, phb->ioda.m32_segsize); 1944 if (phb->ioda.m64_size) 1945 pr_info(" M64: 0x%lx [segment=0x%lx]\n", 1946 phb->ioda.m64_size, phb->ioda.m64_segsize); 1947 if (phb->ioda.io_size) 1948 pr_info(" IO: 0x%x [segment=0x%x]\n", 1949 phb->ioda.io_size, phb->ioda.io_segsize); 1950 1951 1952 phb->hose->ops = &pnv_pci_ops; 1953 phb->get_pe_state = pnv_ioda_get_pe_state; 1954 phb->freeze_pe = pnv_ioda_freeze_pe; 1955 phb->unfreeze_pe = pnv_ioda_unfreeze_pe; 1956 #ifdef CONFIG_EEH 1957 phb->eeh_ops = &ioda_eeh_ops; 1958 #endif 1959 1960 /* Setup RID -> PE mapping function */ 1961 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1962 1963 /* Setup TCEs */ 1964 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1965 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1966 phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; 1967 1968 /* Setup shutdown function for kexec */ 1969 phb->shutdown = pnv_pci_ioda_shutdown; 1970 1971 /* Setup MSI support */ 1972 pnv_pci_init_ioda_msis(phb); 1973 1974 /* 1975 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1976 * to let the PCI core do resource assignment. It's supposed 1977 * that the PCI core will do correct I/O and MMIO alignment 1978 * for the P2P bridge bars so that each PCI bus (excluding 1979 * the child P2P bridges) can form individual PE. 1980 */ 1981 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1982 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1983 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1984 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; 1985 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1986 1987 /* Reset IODA tables to a clean state */ 1988 rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); 1989 if (rc) 1990 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1991 1992 /* If we're running in kdump kerenl, the previous kerenl never 1993 * shutdown PCI devices correctly. We already got IODA table 1994 * cleaned out. So we have to issue PHB reset to stop all PCI 1995 * transactions from previous kerenl. 1996 */ 1997 if (is_kdump_kernel()) { 1998 pr_info(" Issue PHB reset ...\n"); 1999 ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); 2000 ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); 2001 } 2002 2003 /* Configure M64 window */ 2004 if (phb->init_m64 && phb->init_m64(phb)) 2005 hose->mem_resources[1].flags = 0; 2006 } 2007 2008 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 2009 { 2010 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 2011 } 2012 2013 void __init pnv_pci_init_ioda_hub(struct device_node *np) 2014 { 2015 struct device_node *phbn; 2016 const __be64 *prop64; 2017 u64 hub_id; 2018 2019 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 2020 2021 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 2022 if (!prop64) { 2023 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 2024 return; 2025 } 2026 hub_id = be64_to_cpup(prop64); 2027 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 2028 2029 /* Count child PHBs */ 2030 for_each_child_of_node(np, phbn) { 2031 /* Look for IODA1 PHBs */ 2032 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 2033 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 2034 } 2035 } 2036