1 /* pci_sun4v.c: SUN4V specific PCI controller support. 2 * 3 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net) 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/types.h> 8 #include <linux/pci.h> 9 #include <linux/init.h> 10 #include <linux/slab.h> 11 #include <linux/interrupt.h> 12 #include <linux/percpu.h> 13 #include <linux/irq.h> 14 #include <linux/msi.h> 15 #include <linux/export.h> 16 #include <linux/log2.h> 17 #include <linux/of_device.h> 18 #include <linux/iommu-common.h> 19 20 #include <asm/iommu.h> 21 #include <asm/irq.h> 22 #include <asm/hypervisor.h> 23 #include <asm/prom.h> 24 25 #include "pci_impl.h" 26 #include "iommu_common.h" 27 28 #include "pci_sun4v.h" 29 30 #define DRIVER_NAME "pci_sun4v" 31 #define PFX DRIVER_NAME ": " 32 33 static unsigned long vpci_major; 34 static unsigned long vpci_minor; 35 36 struct vpci_version { 37 unsigned long major; 38 unsigned long minor; 39 }; 40 41 /* Ordered from largest major to lowest */ 42 static struct vpci_version vpci_versions[] = { 43 { .major = 2, .minor = 0 }, 44 { .major = 1, .minor = 1 }, 45 }; 46 47 static unsigned long vatu_major = 1; 48 static unsigned long vatu_minor = 1; 49 50 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) 51 52 struct iommu_batch { 53 struct device *dev; /* Device mapping is for. */ 54 unsigned long prot; /* IOMMU page protections */ 55 unsigned long entry; /* Index into IOTSB. */ 56 u64 *pglist; /* List of physical pages */ 57 unsigned long npages; /* Number of pages in list. */ 58 }; 59 60 static DEFINE_PER_CPU(struct iommu_batch, iommu_batch); 61 static int iommu_batch_initialized; 62 63 /* Interrupts must be disabled. */ 64 static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry) 65 { 66 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 67 68 p->dev = dev; 69 p->prot = prot; 70 p->entry = entry; 71 p->npages = 0; 72 } 73 74 /* Interrupts must be disabled. */ 75 static long iommu_batch_flush(struct iommu_batch *p, u64 mask) 76 { 77 struct pci_pbm_info *pbm = p->dev->archdata.host_controller; 78 u64 *pglist = p->pglist; 79 u64 index_count; 80 unsigned long devhandle = pbm->devhandle; 81 unsigned long prot = p->prot; 82 unsigned long entry = p->entry; 83 unsigned long npages = p->npages; 84 unsigned long iotsb_num; 85 unsigned long ret; 86 long num; 87 88 /* VPCI maj=1, min=[0,1] only supports read and write */ 89 if (vpci_major < 2) 90 prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); 91 92 while (npages != 0) { 93 if (mask <= DMA_BIT_MASK(32)) { 94 num = pci_sun4v_iommu_map(devhandle, 95 HV_PCI_TSBID(0, entry), 96 npages, 97 prot, 98 __pa(pglist)); 99 if (unlikely(num < 0)) { 100 pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n", 101 __func__, 102 devhandle, 103 HV_PCI_TSBID(0, entry), 104 npages, prot, __pa(pglist), 105 num); 106 return -1; 107 } 108 } else { 109 index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry), 110 iotsb_num = pbm->iommu->atu->iotsb->iotsb_num; 111 ret = pci_sun4v_iotsb_map(devhandle, 112 iotsb_num, 113 index_count, 114 prot, 115 __pa(pglist), 116 &num); 117 if (unlikely(ret != HV_EOK)) { 118 pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n", 119 __func__, 120 devhandle, iotsb_num, 121 index_count, prot, 122 __pa(pglist), ret); 123 return -1; 124 } 125 } 126 entry += num; 127 npages -= num; 128 pglist += num; 129 } 130 131 p->entry = entry; 132 p->npages = 0; 133 134 return 0; 135 } 136 137 static inline void iommu_batch_new_entry(unsigned long entry, u64 mask) 138 { 139 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 140 141 if (p->entry + p->npages == entry) 142 return; 143 if (p->entry != ~0UL) 144 iommu_batch_flush(p, mask); 145 p->entry = entry; 146 } 147 148 /* Interrupts must be disabled. */ 149 static inline long iommu_batch_add(u64 phys_page, u64 mask) 150 { 151 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 152 153 BUG_ON(p->npages >= PGLIST_NENTS); 154 155 p->pglist[p->npages++] = phys_page; 156 if (p->npages == PGLIST_NENTS) 157 return iommu_batch_flush(p, mask); 158 159 return 0; 160 } 161 162 /* Interrupts must be disabled. */ 163 static inline long iommu_batch_end(u64 mask) 164 { 165 struct iommu_batch *p = this_cpu_ptr(&iommu_batch); 166 167 BUG_ON(p->npages >= PGLIST_NENTS); 168 169 return iommu_batch_flush(p, mask); 170 } 171 172 static void *dma_4v_alloc_coherent(struct device *dev, size_t size, 173 dma_addr_t *dma_addrp, gfp_t gfp, 174 unsigned long attrs) 175 { 176 u64 mask; 177 unsigned long flags, order, first_page, npages, n; 178 unsigned long prot = 0; 179 struct iommu *iommu; 180 struct atu *atu; 181 struct iommu_map_table *tbl; 182 struct page *page; 183 void *ret; 184 long entry; 185 int nid; 186 187 size = IO_PAGE_ALIGN(size); 188 order = get_order(size); 189 if (unlikely(order >= MAX_ORDER)) 190 return NULL; 191 192 npages = size >> IO_PAGE_SHIFT; 193 194 if (attrs & DMA_ATTR_WEAK_ORDERING) 195 prot = HV_PCI_MAP_ATTR_RELAXED_ORDER; 196 197 nid = dev->archdata.numa_node; 198 page = alloc_pages_node(nid, gfp, order); 199 if (unlikely(!page)) 200 return NULL; 201 202 first_page = (unsigned long) page_address(page); 203 memset((char *)first_page, 0, PAGE_SIZE << order); 204 205 iommu = dev->archdata.iommu; 206 atu = iommu->atu; 207 208 mask = dev->coherent_dma_mask; 209 if (mask <= DMA_BIT_MASK(32)) 210 tbl = &iommu->tbl; 211 else 212 tbl = &atu->tbl; 213 214 entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, 215 (unsigned long)(-1), 0); 216 217 if (unlikely(entry == IOMMU_ERROR_CODE)) 218 goto range_alloc_fail; 219 220 *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); 221 ret = (void *) first_page; 222 first_page = __pa(first_page); 223 224 local_irq_save(flags); 225 226 iommu_batch_start(dev, 227 (HV_PCI_MAP_ATTR_READ | prot | 228 HV_PCI_MAP_ATTR_WRITE), 229 entry); 230 231 for (n = 0; n < npages; n++) { 232 long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask); 233 if (unlikely(err < 0L)) 234 goto iommu_map_fail; 235 } 236 237 if (unlikely(iommu_batch_end(mask) < 0L)) 238 goto iommu_map_fail; 239 240 local_irq_restore(flags); 241 242 return ret; 243 244 iommu_map_fail: 245 iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); 246 247 range_alloc_fail: 248 free_pages(first_page, order); 249 return NULL; 250 } 251 252 unsigned long dma_4v_iotsb_bind(unsigned long devhandle, 253 unsigned long iotsb_num, 254 struct pci_bus *bus_dev) 255 { 256 struct pci_dev *pdev; 257 unsigned long err; 258 unsigned int bus; 259 unsigned int device; 260 unsigned int fun; 261 262 list_for_each_entry(pdev, &bus_dev->devices, bus_list) { 263 if (pdev->subordinate) { 264 /* No need to bind pci bridge */ 265 dma_4v_iotsb_bind(devhandle, iotsb_num, 266 pdev->subordinate); 267 } else { 268 bus = bus_dev->number; 269 device = PCI_SLOT(pdev->devfn); 270 fun = PCI_FUNC(pdev->devfn); 271 err = pci_sun4v_iotsb_bind(devhandle, iotsb_num, 272 HV_PCI_DEVICE_BUILD(bus, 273 device, 274 fun)); 275 276 /* If bind fails for one device it is going to fail 277 * for rest of the devices because we are sharing 278 * IOTSB. So in case of failure simply return with 279 * error. 280 */ 281 if (err) 282 return err; 283 } 284 } 285 286 return 0; 287 } 288 289 static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle, 290 dma_addr_t dvma, unsigned long iotsb_num, 291 unsigned long entry, unsigned long npages) 292 { 293 unsigned long num, flags; 294 unsigned long ret; 295 296 local_irq_save(flags); 297 do { 298 if (dvma <= DMA_BIT_MASK(32)) { 299 num = pci_sun4v_iommu_demap(devhandle, 300 HV_PCI_TSBID(0, entry), 301 npages); 302 } else { 303 ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num, 304 entry, npages, &num); 305 if (unlikely(ret != HV_EOK)) { 306 pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n", 307 ret); 308 } 309 } 310 entry += num; 311 npages -= num; 312 } while (npages != 0); 313 local_irq_restore(flags); 314 } 315 316 static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, 317 dma_addr_t dvma, unsigned long attrs) 318 { 319 struct pci_pbm_info *pbm; 320 struct iommu *iommu; 321 struct atu *atu; 322 struct iommu_map_table *tbl; 323 unsigned long order, npages, entry; 324 unsigned long iotsb_num; 325 u32 devhandle; 326 327 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 328 iommu = dev->archdata.iommu; 329 pbm = dev->archdata.host_controller; 330 atu = iommu->atu; 331 devhandle = pbm->devhandle; 332 333 if (dvma <= DMA_BIT_MASK(32)) { 334 tbl = &iommu->tbl; 335 iotsb_num = 0; /* we don't care for legacy iommu */ 336 } else { 337 tbl = &atu->tbl; 338 iotsb_num = atu->iotsb->iotsb_num; 339 } 340 entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT); 341 dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages); 342 iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE); 343 order = get_order(size); 344 if (order < 10) 345 free_pages((unsigned long)cpu, order); 346 } 347 348 static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, 349 unsigned long offset, size_t sz, 350 enum dma_data_direction direction, 351 unsigned long attrs) 352 { 353 struct iommu *iommu; 354 struct atu *atu; 355 struct iommu_map_table *tbl; 356 u64 mask; 357 unsigned long flags, npages, oaddr; 358 unsigned long i, base_paddr; 359 unsigned long prot; 360 dma_addr_t bus_addr, ret; 361 long entry; 362 363 iommu = dev->archdata.iommu; 364 atu = iommu->atu; 365 366 if (unlikely(direction == DMA_NONE)) 367 goto bad; 368 369 oaddr = (unsigned long)(page_address(page) + offset); 370 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 371 npages >>= IO_PAGE_SHIFT; 372 373 mask = *dev->dma_mask; 374 if (mask <= DMA_BIT_MASK(32)) 375 tbl = &iommu->tbl; 376 else 377 tbl = &atu->tbl; 378 379 entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, 380 (unsigned long)(-1), 0); 381 382 if (unlikely(entry == IOMMU_ERROR_CODE)) 383 goto bad; 384 385 bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); 386 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 387 base_paddr = __pa(oaddr & IO_PAGE_MASK); 388 prot = HV_PCI_MAP_ATTR_READ; 389 if (direction != DMA_TO_DEVICE) 390 prot |= HV_PCI_MAP_ATTR_WRITE; 391 392 if (attrs & DMA_ATTR_WEAK_ORDERING) 393 prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER; 394 395 local_irq_save(flags); 396 397 iommu_batch_start(dev, prot, entry); 398 399 for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { 400 long err = iommu_batch_add(base_paddr, mask); 401 if (unlikely(err < 0L)) 402 goto iommu_map_fail; 403 } 404 if (unlikely(iommu_batch_end(mask) < 0L)) 405 goto iommu_map_fail; 406 407 local_irq_restore(flags); 408 409 return ret; 410 411 bad: 412 if (printk_ratelimit()) 413 WARN_ON(1); 414 return DMA_ERROR_CODE; 415 416 iommu_map_fail: 417 iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); 418 return DMA_ERROR_CODE; 419 } 420 421 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, 422 size_t sz, enum dma_data_direction direction, 423 unsigned long attrs) 424 { 425 struct pci_pbm_info *pbm; 426 struct iommu *iommu; 427 struct atu *atu; 428 struct iommu_map_table *tbl; 429 unsigned long npages; 430 unsigned long iotsb_num; 431 long entry; 432 u32 devhandle; 433 434 if (unlikely(direction == DMA_NONE)) { 435 if (printk_ratelimit()) 436 WARN_ON(1); 437 return; 438 } 439 440 iommu = dev->archdata.iommu; 441 pbm = dev->archdata.host_controller; 442 atu = iommu->atu; 443 devhandle = pbm->devhandle; 444 445 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); 446 npages >>= IO_PAGE_SHIFT; 447 bus_addr &= IO_PAGE_MASK; 448 449 if (bus_addr <= DMA_BIT_MASK(32)) { 450 iotsb_num = 0; /* we don't care for legacy iommu */ 451 tbl = &iommu->tbl; 452 } else { 453 iotsb_num = atu->iotsb->iotsb_num; 454 tbl = &atu->tbl; 455 } 456 entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT; 457 dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages); 458 iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); 459 } 460 461 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 462 int nelems, enum dma_data_direction direction, 463 unsigned long attrs) 464 { 465 struct scatterlist *s, *outs, *segstart; 466 unsigned long flags, handle, prot; 467 dma_addr_t dma_next = 0, dma_addr; 468 unsigned int max_seg_size; 469 unsigned long seg_boundary_size; 470 int outcount, incount, i; 471 struct iommu *iommu; 472 struct atu *atu; 473 struct iommu_map_table *tbl; 474 u64 mask; 475 unsigned long base_shift; 476 long err; 477 478 BUG_ON(direction == DMA_NONE); 479 480 iommu = dev->archdata.iommu; 481 atu = iommu->atu; 482 483 if (nelems == 0 || !iommu) 484 return 0; 485 486 prot = HV_PCI_MAP_ATTR_READ; 487 if (direction != DMA_TO_DEVICE) 488 prot |= HV_PCI_MAP_ATTR_WRITE; 489 490 if (attrs & DMA_ATTR_WEAK_ORDERING) 491 prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER; 492 493 outs = s = segstart = &sglist[0]; 494 outcount = 1; 495 incount = nelems; 496 handle = 0; 497 498 /* Init first segment length for backout at failure */ 499 outs->dma_length = 0; 500 501 local_irq_save(flags); 502 503 iommu_batch_start(dev, prot, ~0UL); 504 505 max_seg_size = dma_get_max_seg_size(dev); 506 seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 507 IO_PAGE_SIZE) >> IO_PAGE_SHIFT; 508 509 mask = *dev->dma_mask; 510 if (mask <= DMA_BIT_MASK(32)) 511 tbl = &iommu->tbl; 512 else 513 tbl = &atu->tbl; 514 515 base_shift = tbl->table_map_base >> IO_PAGE_SHIFT; 516 517 for_each_sg(sglist, s, nelems, i) { 518 unsigned long paddr, npages, entry, out_entry = 0, slen; 519 520 slen = s->length; 521 /* Sanity check */ 522 if (slen == 0) { 523 dma_next = 0; 524 continue; 525 } 526 /* Allocate iommu entries for that segment */ 527 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); 528 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); 529 entry = iommu_tbl_range_alloc(dev, tbl, npages, 530 &handle, (unsigned long)(-1), 0); 531 532 /* Handle failure */ 533 if (unlikely(entry == IOMMU_ERROR_CODE)) { 534 pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n", 535 tbl, paddr, npages); 536 goto iommu_map_failed; 537 } 538 539 iommu_batch_new_entry(entry, mask); 540 541 /* Convert entry to a dma_addr_t */ 542 dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT); 543 dma_addr |= (s->offset & ~IO_PAGE_MASK); 544 545 /* Insert into HW table */ 546 paddr &= IO_PAGE_MASK; 547 while (npages--) { 548 err = iommu_batch_add(paddr, mask); 549 if (unlikely(err < 0L)) 550 goto iommu_map_failed; 551 paddr += IO_PAGE_SIZE; 552 } 553 554 /* If we are in an open segment, try merging */ 555 if (segstart != s) { 556 /* We cannot merge if: 557 * - allocated dma_addr isn't contiguous to previous allocation 558 */ 559 if ((dma_addr != dma_next) || 560 (outs->dma_length + s->length > max_seg_size) || 561 (is_span_boundary(out_entry, base_shift, 562 seg_boundary_size, outs, s))) { 563 /* Can't merge: create a new segment */ 564 segstart = s; 565 outcount++; 566 outs = sg_next(outs); 567 } else { 568 outs->dma_length += s->length; 569 } 570 } 571 572 if (segstart == s) { 573 /* This is a new segment, fill entries */ 574 outs->dma_address = dma_addr; 575 outs->dma_length = slen; 576 out_entry = entry; 577 } 578 579 /* Calculate next page pointer for contiguous check */ 580 dma_next = dma_addr + slen; 581 } 582 583 err = iommu_batch_end(mask); 584 585 if (unlikely(err < 0L)) 586 goto iommu_map_failed; 587 588 local_irq_restore(flags); 589 590 if (outcount < incount) { 591 outs = sg_next(outs); 592 outs->dma_address = DMA_ERROR_CODE; 593 outs->dma_length = 0; 594 } 595 596 return outcount; 597 598 iommu_map_failed: 599 for_each_sg(sglist, s, nelems, i) { 600 if (s->dma_length != 0) { 601 unsigned long vaddr, npages; 602 603 vaddr = s->dma_address & IO_PAGE_MASK; 604 npages = iommu_num_pages(s->dma_address, s->dma_length, 605 IO_PAGE_SIZE); 606 iommu_tbl_range_free(tbl, vaddr, npages, 607 IOMMU_ERROR_CODE); 608 /* XXX demap? XXX */ 609 s->dma_address = DMA_ERROR_CODE; 610 s->dma_length = 0; 611 } 612 if (s == outs) 613 break; 614 } 615 local_irq_restore(flags); 616 617 return 0; 618 } 619 620 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, 621 int nelems, enum dma_data_direction direction, 622 unsigned long attrs) 623 { 624 struct pci_pbm_info *pbm; 625 struct scatterlist *sg; 626 struct iommu *iommu; 627 struct atu *atu; 628 unsigned long flags, entry; 629 unsigned long iotsb_num; 630 u32 devhandle; 631 632 BUG_ON(direction == DMA_NONE); 633 634 iommu = dev->archdata.iommu; 635 pbm = dev->archdata.host_controller; 636 atu = iommu->atu; 637 devhandle = pbm->devhandle; 638 639 local_irq_save(flags); 640 641 sg = sglist; 642 while (nelems--) { 643 dma_addr_t dma_handle = sg->dma_address; 644 unsigned int len = sg->dma_length; 645 unsigned long npages; 646 struct iommu_map_table *tbl; 647 unsigned long shift = IO_PAGE_SHIFT; 648 649 if (!len) 650 break; 651 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); 652 653 if (dma_handle <= DMA_BIT_MASK(32)) { 654 iotsb_num = 0; /* we don't care for legacy iommu */ 655 tbl = &iommu->tbl; 656 } else { 657 iotsb_num = atu->iotsb->iotsb_num; 658 tbl = &atu->tbl; 659 } 660 entry = ((dma_handle - tbl->table_map_base) >> shift); 661 dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num, 662 entry, npages); 663 iommu_tbl_range_free(tbl, dma_handle, npages, 664 IOMMU_ERROR_CODE); 665 sg = sg_next(sg); 666 } 667 668 local_irq_restore(flags); 669 } 670 671 static struct dma_map_ops sun4v_dma_ops = { 672 .alloc = dma_4v_alloc_coherent, 673 .free = dma_4v_free_coherent, 674 .map_page = dma_4v_map_page, 675 .unmap_page = dma_4v_unmap_page, 676 .map_sg = dma_4v_map_sg, 677 .unmap_sg = dma_4v_unmap_sg, 678 }; 679 680 static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) 681 { 682 struct property *prop; 683 struct device_node *dp; 684 685 dp = pbm->op->dev.of_node; 686 prop = of_find_property(dp, "66mhz-capable", NULL); 687 pbm->is_66mhz_capable = (prop != NULL); 688 pbm->pci_bus = pci_scan_one_pbm(pbm, parent); 689 690 /* XXX register error interrupt handlers XXX */ 691 } 692 693 static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, 694 struct iommu_map_table *iommu) 695 { 696 struct iommu_pool *pool; 697 unsigned long i, pool_nr, cnt = 0; 698 u32 devhandle; 699 700 devhandle = pbm->devhandle; 701 for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) { 702 pool = &(iommu->pools[pool_nr]); 703 for (i = pool->start; i <= pool->end; i++) { 704 unsigned long ret, io_attrs, ra; 705 706 ret = pci_sun4v_iommu_getmap(devhandle, 707 HV_PCI_TSBID(0, i), 708 &io_attrs, &ra); 709 if (ret == HV_EOK) { 710 if (page_in_phys_avail(ra)) { 711 pci_sun4v_iommu_demap(devhandle, 712 HV_PCI_TSBID(0, 713 i), 1); 714 } else { 715 cnt++; 716 __set_bit(i, iommu->map); 717 } 718 } 719 } 720 } 721 return cnt; 722 } 723 724 static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm) 725 { 726 struct atu *atu = pbm->iommu->atu; 727 struct atu_iotsb *iotsb; 728 void *table; 729 u64 table_size; 730 u64 iotsb_num; 731 unsigned long order; 732 unsigned long err; 733 734 iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL); 735 if (!iotsb) { 736 err = -ENOMEM; 737 goto out_err; 738 } 739 atu->iotsb = iotsb; 740 741 /* calculate size of IOTSB */ 742 table_size = (atu->size / IO_PAGE_SIZE) * 8; 743 order = get_order(table_size); 744 table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); 745 if (!table) { 746 err = -ENOMEM; 747 goto table_failed; 748 } 749 iotsb->table = table; 750 iotsb->ra = __pa(table); 751 iotsb->dvma_size = atu->size; 752 iotsb->dvma_base = atu->base; 753 iotsb->table_size = table_size; 754 iotsb->page_size = IO_PAGE_SIZE; 755 756 /* configure and register IOTSB with HV */ 757 err = pci_sun4v_iotsb_conf(pbm->devhandle, 758 iotsb->ra, 759 iotsb->table_size, 760 iotsb->page_size, 761 iotsb->dvma_base, 762 &iotsb_num); 763 if (err) { 764 pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err); 765 goto iotsb_conf_failed; 766 } 767 iotsb->iotsb_num = iotsb_num; 768 769 err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus); 770 if (err) { 771 pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err); 772 goto iotsb_conf_failed; 773 } 774 775 return 0; 776 777 iotsb_conf_failed: 778 free_pages((unsigned long)table, order); 779 table_failed: 780 kfree(iotsb); 781 out_err: 782 return err; 783 } 784 785 static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) 786 { 787 struct atu *atu = pbm->iommu->atu; 788 unsigned long err; 789 const u64 *ranges; 790 u64 map_size, num_iotte; 791 u64 dma_mask; 792 const u32 *page_size; 793 int len; 794 795 ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges", 796 &len); 797 if (!ranges) { 798 pr_err(PFX "No iommu-address-ranges\n"); 799 return -EINVAL; 800 } 801 802 page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes", 803 NULL); 804 if (!page_size) { 805 pr_err(PFX "No iommu-pagesizes\n"); 806 return -EINVAL; 807 } 808 809 /* There are 4 iommu-address-ranges supported. Each range is pair of 810 * {base, size}. The ranges[0] and ranges[1] are 32bit address space 811 * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit 812 * address ranges to support 64bit addressing. Because 'size' for 813 * address ranges[2] and ranges[3] are same we can select either of 814 * ranges[2] or ranges[3] for mapping. However due to 'size' is too 815 * large for OS to allocate IOTSB we are using fix size 32G 816 * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices 817 * to share. 818 */ 819 atu->ranges = (struct atu_ranges *)ranges; 820 atu->base = atu->ranges[3].base; 821 atu->size = ATU_64_SPACE_SIZE; 822 823 /* Create IOTSB */ 824 err = pci_sun4v_atu_alloc_iotsb(pbm); 825 if (err) { 826 pr_err(PFX "Error creating ATU IOTSB\n"); 827 return err; 828 } 829 830 /* Create ATU iommu map. 831 * One bit represents one iotte in IOTSB table. 832 */ 833 dma_mask = (roundup_pow_of_two(atu->size) - 1UL); 834 num_iotte = atu->size / IO_PAGE_SIZE; 835 map_size = num_iotte / 8; 836 atu->tbl.table_map_base = atu->base; 837 atu->dma_addr_mask = dma_mask; 838 atu->tbl.map = kzalloc(map_size, GFP_KERNEL); 839 if (!atu->tbl.map) 840 return -ENOMEM; 841 842 iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT, 843 NULL, false /* no large_pool */, 844 0 /* default npools */, 845 false /* want span boundary checking */); 846 847 return 0; 848 } 849 850 static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) 851 { 852 static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; 853 struct iommu *iommu = pbm->iommu; 854 unsigned long num_tsb_entries, sz; 855 u32 dma_mask, dma_offset; 856 const u32 *vdma; 857 858 vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL); 859 if (!vdma) 860 vdma = vdma_default; 861 862 if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) { 863 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", 864 vdma[0], vdma[1]); 865 return -EINVAL; 866 } 867 868 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); 869 num_tsb_entries = vdma[1] / IO_PAGE_SIZE; 870 871 dma_offset = vdma[0]; 872 873 /* Setup initial software IOMMU state. */ 874 spin_lock_init(&iommu->lock); 875 iommu->ctx_lowest_free = 1; 876 iommu->tbl.table_map_base = dma_offset; 877 iommu->dma_addr_mask = dma_mask; 878 879 /* Allocate and initialize the free area map. */ 880 sz = (num_tsb_entries + 7) / 8; 881 sz = (sz + 7UL) & ~7UL; 882 iommu->tbl.map = kzalloc(sz, GFP_KERNEL); 883 if (!iommu->tbl.map) { 884 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); 885 return -ENOMEM; 886 } 887 iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, 888 NULL, false /* no large_pool */, 889 0 /* default npools */, 890 false /* want span boundary checking */); 891 sz = probe_existing_entries(pbm, &iommu->tbl); 892 if (sz) 893 printk("%s: Imported %lu TSB entries from OBP\n", 894 pbm->name, sz); 895 896 return 0; 897 } 898 899 #ifdef CONFIG_PCI_MSI 900 struct pci_sun4v_msiq_entry { 901 u64 version_type; 902 #define MSIQ_VERSION_MASK 0xffffffff00000000UL 903 #define MSIQ_VERSION_SHIFT 32 904 #define MSIQ_TYPE_MASK 0x00000000000000ffUL 905 #define MSIQ_TYPE_SHIFT 0 906 #define MSIQ_TYPE_NONE 0x00 907 #define MSIQ_TYPE_MSG 0x01 908 #define MSIQ_TYPE_MSI32 0x02 909 #define MSIQ_TYPE_MSI64 0x03 910 #define MSIQ_TYPE_INTX 0x08 911 #define MSIQ_TYPE_NONE2 0xff 912 913 u64 intx_sysino; 914 u64 reserved1; 915 u64 stick; 916 u64 req_id; /* bus/device/func */ 917 #define MSIQ_REQID_BUS_MASK 0xff00UL 918 #define MSIQ_REQID_BUS_SHIFT 8 919 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL 920 #define MSIQ_REQID_DEVICE_SHIFT 3 921 #define MSIQ_REQID_FUNC_MASK 0x0007UL 922 #define MSIQ_REQID_FUNC_SHIFT 0 923 924 u64 msi_address; 925 926 /* The format of this value is message type dependent. 927 * For MSI bits 15:0 are the data from the MSI packet. 928 * For MSI-X bits 31:0 are the data from the MSI packet. 929 * For MSG, the message code and message routing code where: 930 * bits 39:32 is the bus/device/fn of the msg target-id 931 * bits 18:16 is the message routing code 932 * bits 7:0 is the message code 933 * For INTx the low order 2-bits are: 934 * 00 - INTA 935 * 01 - INTB 936 * 10 - INTC 937 * 11 - INTD 938 */ 939 u64 msi_data; 940 941 u64 reserved2; 942 }; 943 944 static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid, 945 unsigned long *head) 946 { 947 unsigned long err, limit; 948 949 err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head); 950 if (unlikely(err)) 951 return -ENXIO; 952 953 limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); 954 if (unlikely(*head >= limit)) 955 return -EFBIG; 956 957 return 0; 958 } 959 960 static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm, 961 unsigned long msiqid, unsigned long *head, 962 unsigned long *msi) 963 { 964 struct pci_sun4v_msiq_entry *ep; 965 unsigned long err, type; 966 967 /* Note: void pointer arithmetic, 'head' is a byte offset */ 968 ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 969 (pbm->msiq_ent_count * 970 sizeof(struct pci_sun4v_msiq_entry))) + 971 *head); 972 973 if ((ep->version_type & MSIQ_TYPE_MASK) == 0) 974 return 0; 975 976 type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT; 977 if (unlikely(type != MSIQ_TYPE_MSI32 && 978 type != MSIQ_TYPE_MSI64)) 979 return -EINVAL; 980 981 *msi = ep->msi_data; 982 983 err = pci_sun4v_msi_setstate(pbm->devhandle, 984 ep->msi_data /* msi_num */, 985 HV_MSISTATE_IDLE); 986 if (unlikely(err)) 987 return -ENXIO; 988 989 /* Clear the entry. */ 990 ep->version_type &= ~MSIQ_TYPE_MASK; 991 992 (*head) += sizeof(struct pci_sun4v_msiq_entry); 993 if (*head >= 994 (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry))) 995 *head = 0; 996 997 return 1; 998 } 999 1000 static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid, 1001 unsigned long head) 1002 { 1003 unsigned long err; 1004 1005 err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head); 1006 if (unlikely(err)) 1007 return -EINVAL; 1008 1009 return 0; 1010 } 1011 1012 static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid, 1013 unsigned long msi, int is_msi64) 1014 { 1015 if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid, 1016 (is_msi64 ? 1017 HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32))) 1018 return -ENXIO; 1019 if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE)) 1020 return -ENXIO; 1021 if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID)) 1022 return -ENXIO; 1023 return 0; 1024 } 1025 1026 static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi) 1027 { 1028 unsigned long err, msiqid; 1029 1030 err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid); 1031 if (err) 1032 return -ENXIO; 1033 1034 pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID); 1035 1036 return 0; 1037 } 1038 1039 static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm) 1040 { 1041 unsigned long q_size, alloc_size, pages, order; 1042 int i; 1043 1044 q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); 1045 alloc_size = (pbm->msiq_num * q_size); 1046 order = get_order(alloc_size); 1047 pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order); 1048 if (pages == 0UL) { 1049 printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n", 1050 order); 1051 return -ENOMEM; 1052 } 1053 memset((char *)pages, 0, PAGE_SIZE << order); 1054 pbm->msi_queues = (void *) pages; 1055 1056 for (i = 0; i < pbm->msiq_num; i++) { 1057 unsigned long err, base = __pa(pages + (i * q_size)); 1058 unsigned long ret1, ret2; 1059 1060 err = pci_sun4v_msiq_conf(pbm->devhandle, 1061 pbm->msiq_first + i, 1062 base, pbm->msiq_ent_count); 1063 if (err) { 1064 printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n", 1065 err); 1066 goto h_error; 1067 } 1068 1069 err = pci_sun4v_msiq_info(pbm->devhandle, 1070 pbm->msiq_first + i, 1071 &ret1, &ret2); 1072 if (err) { 1073 printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n", 1074 err); 1075 goto h_error; 1076 } 1077 if (ret1 != base || ret2 != pbm->msiq_ent_count) { 1078 printk(KERN_ERR "MSI: Bogus qconf " 1079 "expected[%lx:%x] got[%lx:%lx]\n", 1080 base, pbm->msiq_ent_count, 1081 ret1, ret2); 1082 goto h_error; 1083 } 1084 } 1085 1086 return 0; 1087 1088 h_error: 1089 free_pages(pages, order); 1090 return -EINVAL; 1091 } 1092 1093 static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm) 1094 { 1095 unsigned long q_size, alloc_size, pages, order; 1096 int i; 1097 1098 for (i = 0; i < pbm->msiq_num; i++) { 1099 unsigned long msiqid = pbm->msiq_first + i; 1100 1101 (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0); 1102 } 1103 1104 q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); 1105 alloc_size = (pbm->msiq_num * q_size); 1106 order = get_order(alloc_size); 1107 1108 pages = (unsigned long) pbm->msi_queues; 1109 1110 free_pages(pages, order); 1111 1112 pbm->msi_queues = NULL; 1113 } 1114 1115 static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm, 1116 unsigned long msiqid, 1117 unsigned long devino) 1118 { 1119 unsigned int irq = sun4v_build_irq(pbm->devhandle, devino); 1120 1121 if (!irq) 1122 return -ENOMEM; 1123 1124 if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) 1125 return -EINVAL; 1126 if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) 1127 return -EINVAL; 1128 1129 return irq; 1130 } 1131 1132 static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = { 1133 .get_head = pci_sun4v_get_head, 1134 .dequeue_msi = pci_sun4v_dequeue_msi, 1135 .set_head = pci_sun4v_set_head, 1136 .msi_setup = pci_sun4v_msi_setup, 1137 .msi_teardown = pci_sun4v_msi_teardown, 1138 .msiq_alloc = pci_sun4v_msiq_alloc, 1139 .msiq_free = pci_sun4v_msiq_free, 1140 .msiq_build_irq = pci_sun4v_msiq_build_irq, 1141 }; 1142 1143 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) 1144 { 1145 sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops); 1146 } 1147 #else /* CONFIG_PCI_MSI */ 1148 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) 1149 { 1150 } 1151 #endif /* !(CONFIG_PCI_MSI) */ 1152 1153 static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm, 1154 struct platform_device *op, u32 devhandle) 1155 { 1156 struct device_node *dp = op->dev.of_node; 1157 int err; 1158 1159 pbm->numa_node = of_node_to_nid(dp); 1160 1161 pbm->pci_ops = &sun4v_pci_ops; 1162 pbm->config_space_reg_bits = 12; 1163 1164 pbm->index = pci_num_pbms++; 1165 1166 pbm->op = op; 1167 1168 pbm->devhandle = devhandle; 1169 1170 pbm->name = dp->full_name; 1171 1172 printk("%s: SUN4V PCI Bus Module\n", pbm->name); 1173 printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node); 1174 1175 pci_determine_mem_io_space(pbm); 1176 1177 pci_get_pbm_props(pbm); 1178 1179 err = pci_sun4v_iommu_init(pbm); 1180 if (err) 1181 return err; 1182 1183 pci_sun4v_msi_init(pbm); 1184 1185 pci_sun4v_scan_bus(pbm, &op->dev); 1186 1187 /* if atu_init fails its not complete failure. 1188 * we can still continue using legacy iommu. 1189 */ 1190 if (pbm->iommu->atu) { 1191 err = pci_sun4v_atu_init(pbm); 1192 if (err) { 1193 kfree(pbm->iommu->atu); 1194 pbm->iommu->atu = NULL; 1195 pr_err(PFX "ATU init failed, err=%d\n", err); 1196 } 1197 } 1198 1199 pbm->next = pci_pbm_root; 1200 pci_pbm_root = pbm; 1201 1202 return 0; 1203 } 1204 1205 static int pci_sun4v_probe(struct platform_device *op) 1206 { 1207 const struct linux_prom64_registers *regs; 1208 static int hvapi_negotiated = 0; 1209 struct pci_pbm_info *pbm; 1210 struct device_node *dp; 1211 struct iommu *iommu; 1212 struct atu *atu; 1213 u32 devhandle; 1214 int i, err = -ENODEV; 1215 static bool hv_atu = true; 1216 1217 dp = op->dev.of_node; 1218 1219 if (!hvapi_negotiated++) { 1220 for (i = 0; i < ARRAY_SIZE(vpci_versions); i++) { 1221 vpci_major = vpci_versions[i].major; 1222 vpci_minor = vpci_versions[i].minor; 1223 1224 err = sun4v_hvapi_register(HV_GRP_PCI, vpci_major, 1225 &vpci_minor); 1226 if (!err) 1227 break; 1228 } 1229 1230 if (err) { 1231 pr_err(PFX "Could not register hvapi, err=%d\n", err); 1232 return err; 1233 } 1234 pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", 1235 vpci_major, vpci_minor); 1236 1237 err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor); 1238 if (err) { 1239 /* don't return an error if we fail to register the 1240 * ATU group, but ATU hcalls won't be available. 1241 */ 1242 hv_atu = false; 1243 pr_err(PFX "Could not register hvapi ATU err=%d\n", 1244 err); 1245 } else { 1246 pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n", 1247 vatu_major, vatu_minor); 1248 } 1249 1250 dma_ops = &sun4v_dma_ops; 1251 } 1252 1253 regs = of_get_property(dp, "reg", NULL); 1254 err = -ENODEV; 1255 if (!regs) { 1256 printk(KERN_ERR PFX "Could not find config registers\n"); 1257 goto out_err; 1258 } 1259 devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff; 1260 1261 err = -ENOMEM; 1262 if (!iommu_batch_initialized) { 1263 for_each_possible_cpu(i) { 1264 unsigned long page = get_zeroed_page(GFP_KERNEL); 1265 1266 if (!page) 1267 goto out_err; 1268 1269 per_cpu(iommu_batch, i).pglist = (u64 *) page; 1270 } 1271 iommu_batch_initialized = 1; 1272 } 1273 1274 pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); 1275 if (!pbm) { 1276 printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n"); 1277 goto out_err; 1278 } 1279 1280 iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL); 1281 if (!iommu) { 1282 printk(KERN_ERR PFX "Could not allocate pbm iommu\n"); 1283 goto out_free_controller; 1284 } 1285 1286 pbm->iommu = iommu; 1287 iommu->atu = NULL; 1288 if (hv_atu) { 1289 atu = kzalloc(sizeof(*atu), GFP_KERNEL); 1290 if (!atu) 1291 pr_err(PFX "Could not allocate atu\n"); 1292 else 1293 iommu->atu = atu; 1294 } 1295 1296 err = pci_sun4v_pbm_init(pbm, op, devhandle); 1297 if (err) 1298 goto out_free_iommu; 1299 1300 dev_set_drvdata(&op->dev, pbm); 1301 1302 return 0; 1303 1304 out_free_iommu: 1305 kfree(iommu->atu); 1306 kfree(pbm->iommu); 1307 1308 out_free_controller: 1309 kfree(pbm); 1310 1311 out_err: 1312 return err; 1313 } 1314 1315 static const struct of_device_id pci_sun4v_match[] = { 1316 { 1317 .name = "pci", 1318 .compatible = "SUNW,sun4v-pci", 1319 }, 1320 {}, 1321 }; 1322 1323 static struct platform_driver pci_sun4v_driver = { 1324 .driver = { 1325 .name = DRIVER_NAME, 1326 .of_match_table = pci_sun4v_match, 1327 }, 1328 .probe = pci_sun4v_probe, 1329 }; 1330 1331 static int __init pci_sun4v_init(void) 1332 { 1333 return platform_driver_register(&pci_sun4v_driver); 1334 } 1335 1336 subsys_initcall(pci_sun4v_init); 1337