1 /* 2 * Copyright IBM Corp. 2012 3 * 4 * Author(s): 5 * Jan Glauber <jang@linux.vnet.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include <linux/export.h> 11 #include <linux/iommu-helper.h> 12 #include <linux/dma-mapping.h> 13 #include <linux/vmalloc.h> 14 #include <linux/pci.h> 15 #include <asm/pci_dma.h> 16 17 #define S390_MAPPING_ERROR (~(dma_addr_t) 0x0) 18 19 static struct kmem_cache *dma_region_table_cache; 20 static struct kmem_cache *dma_page_table_cache; 21 static int s390_iommu_strict; 22 23 static int zpci_refresh_global(struct zpci_dev *zdev) 24 { 25 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 26 zdev->iommu_pages * PAGE_SIZE); 27 } 28 29 unsigned long *dma_alloc_cpu_table(void) 30 { 31 unsigned long *table, *entry; 32 33 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 34 if (!table) 35 return NULL; 36 37 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 38 *entry = ZPCI_TABLE_INVALID; 39 return table; 40 } 41 42 static void dma_free_cpu_table(void *table) 43 { 44 kmem_cache_free(dma_region_table_cache, table); 45 } 46 47 static unsigned long *dma_alloc_page_table(void) 48 { 49 unsigned long *table, *entry; 50 51 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 52 if (!table) 53 return NULL; 54 55 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 56 *entry = ZPCI_PTE_INVALID; 57 return table; 58 } 59 60 static void dma_free_page_table(void *table) 61 { 62 kmem_cache_free(dma_page_table_cache, table); 63 } 64 65 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 66 { 67 unsigned long *sto; 68 69 if (reg_entry_isvalid(*entry)) 70 sto = get_rt_sto(*entry); 71 else { 72 sto = dma_alloc_cpu_table(); 73 if (!sto) 74 return NULL; 75 76 set_rt_sto(entry, sto); 77 validate_rt_entry(entry); 78 entry_clr_protected(entry); 79 } 80 return sto; 81 } 82 83 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 84 { 85 unsigned long *pto; 86 87 if (reg_entry_isvalid(*entry)) 88 pto = get_st_pto(*entry); 89 else { 90 pto = dma_alloc_page_table(); 91 if (!pto) 92 return NULL; 93 set_st_pto(entry, pto); 94 validate_st_entry(entry); 95 entry_clr_protected(entry); 96 } 97 return pto; 98 } 99 100 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 101 { 102 unsigned long *sto, *pto; 103 unsigned int rtx, sx, px; 104 105 rtx = calc_rtx(dma_addr); 106 sto = dma_get_seg_table_origin(&rto[rtx]); 107 if (!sto) 108 return NULL; 109 110 sx = calc_sx(dma_addr); 111 pto = dma_get_page_table_origin(&sto[sx]); 112 if (!pto) 113 return NULL; 114 115 px = calc_px(dma_addr); 116 return &pto[px]; 117 } 118 119 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 120 { 121 if (flags & ZPCI_PTE_INVALID) { 122 invalidate_pt_entry(entry); 123 } else { 124 set_pt_pfaa(entry, page_addr); 125 validate_pt_entry(entry); 126 } 127 128 if (flags & ZPCI_TABLE_PROTECTED) 129 entry_set_protected(entry); 130 else 131 entry_clr_protected(entry); 132 } 133 134 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 135 dma_addr_t dma_addr, size_t size, int flags) 136 { 137 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 138 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 139 unsigned long irq_flags; 140 unsigned long *entry; 141 int i, rc = 0; 142 143 if (!nr_pages) 144 return -EINVAL; 145 146 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 147 if (!zdev->dma_table) { 148 rc = -EINVAL; 149 goto out_unlock; 150 } 151 152 for (i = 0; i < nr_pages; i++) { 153 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 154 if (!entry) { 155 rc = -ENOMEM; 156 goto undo_cpu_trans; 157 } 158 dma_update_cpu_trans(entry, page_addr, flags); 159 page_addr += PAGE_SIZE; 160 dma_addr += PAGE_SIZE; 161 } 162 163 undo_cpu_trans: 164 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 165 flags = ZPCI_PTE_INVALID; 166 while (i-- > 0) { 167 page_addr -= PAGE_SIZE; 168 dma_addr -= PAGE_SIZE; 169 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 170 if (!entry) 171 break; 172 dma_update_cpu_trans(entry, page_addr, flags); 173 } 174 } 175 out_unlock: 176 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 177 return rc; 178 } 179 180 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 181 size_t size, int flags) 182 { 183 /* 184 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 185 * translations when previously invalid translation-table entries are 186 * validated. With lazy unmap, rpcit is skipped for previously valid 187 * entries, but a global rpcit is then required before any address can 188 * be re-used, i.e. after each iommu bitmap wrap-around. 189 */ 190 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 191 if (!zdev->tlb_refresh) 192 return 0; 193 } else { 194 if (!s390_iommu_strict) 195 return 0; 196 } 197 198 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 199 PAGE_ALIGN(size)); 200 } 201 202 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 203 dma_addr_t dma_addr, size_t size, int flags) 204 { 205 int rc; 206 207 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 208 if (rc) 209 return rc; 210 211 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 212 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 213 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 214 215 return rc; 216 } 217 218 void dma_free_seg_table(unsigned long entry) 219 { 220 unsigned long *sto = get_rt_sto(entry); 221 int sx; 222 223 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 224 if (reg_entry_isvalid(sto[sx])) 225 dma_free_page_table(get_st_pto(sto[sx])); 226 227 dma_free_cpu_table(sto); 228 } 229 230 void dma_cleanup_tables(unsigned long *table) 231 { 232 int rtx; 233 234 if (!table) 235 return; 236 237 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 238 if (reg_entry_isvalid(table[rtx])) 239 dma_free_seg_table(table[rtx]); 240 241 dma_free_cpu_table(table); 242 } 243 244 static unsigned long __dma_alloc_iommu(struct device *dev, 245 unsigned long start, int size) 246 { 247 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 248 unsigned long boundary_size; 249 250 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 251 PAGE_SIZE) >> PAGE_SHIFT; 252 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 253 start, size, zdev->start_dma >> PAGE_SHIFT, 254 boundary_size, 0); 255 } 256 257 static dma_addr_t dma_alloc_address(struct device *dev, int size) 258 { 259 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 260 unsigned long offset, flags; 261 262 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 263 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 264 if (offset == -1) { 265 if (!s390_iommu_strict) { 266 /* global flush before DMA addresses are reused */ 267 if (zpci_refresh_global(zdev)) 268 goto out_error; 269 270 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 271 zdev->lazy_bitmap, zdev->iommu_pages); 272 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 273 } 274 /* wrap-around */ 275 offset = __dma_alloc_iommu(dev, 0, size); 276 if (offset == -1) 277 goto out_error; 278 } 279 zdev->next_bit = offset + size; 280 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 281 282 return zdev->start_dma + offset * PAGE_SIZE; 283 284 out_error: 285 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 286 return S390_MAPPING_ERROR; 287 } 288 289 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 290 { 291 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 292 unsigned long flags, offset; 293 294 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 295 296 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 297 if (!zdev->iommu_bitmap) 298 goto out; 299 300 if (s390_iommu_strict) 301 bitmap_clear(zdev->iommu_bitmap, offset, size); 302 else 303 bitmap_set(zdev->lazy_bitmap, offset, size); 304 305 out: 306 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 307 } 308 309 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 310 { 311 struct { 312 unsigned long rc; 313 unsigned long addr; 314 } __packed data = {rc, addr}; 315 316 zpci_err_hex(&data, sizeof(data)); 317 } 318 319 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 320 unsigned long offset, size_t size, 321 enum dma_data_direction direction, 322 unsigned long attrs) 323 { 324 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 325 unsigned long pa = page_to_phys(page) + offset; 326 int flags = ZPCI_PTE_VALID; 327 unsigned long nr_pages; 328 dma_addr_t dma_addr; 329 int ret; 330 331 /* This rounds up number of pages based on size and offset */ 332 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 333 dma_addr = dma_alloc_address(dev, nr_pages); 334 if (dma_addr == S390_MAPPING_ERROR) { 335 ret = -ENOSPC; 336 goto out_err; 337 } 338 339 /* Use rounded up size */ 340 size = nr_pages * PAGE_SIZE; 341 342 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 343 flags |= ZPCI_TABLE_PROTECTED; 344 345 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 346 if (ret) 347 goto out_free; 348 349 atomic64_add(nr_pages, &zdev->mapped_pages); 350 return dma_addr + (offset & ~PAGE_MASK); 351 352 out_free: 353 dma_free_address(dev, dma_addr, nr_pages); 354 out_err: 355 zpci_err("map error:\n"); 356 zpci_err_dma(ret, pa); 357 return S390_MAPPING_ERROR; 358 } 359 360 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 361 size_t size, enum dma_data_direction direction, 362 unsigned long attrs) 363 { 364 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 365 int npages, ret; 366 367 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 368 dma_addr = dma_addr & PAGE_MASK; 369 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 370 ZPCI_PTE_INVALID); 371 if (ret) { 372 zpci_err("unmap error:\n"); 373 zpci_err_dma(ret, dma_addr); 374 return; 375 } 376 377 atomic64_add(npages, &zdev->unmapped_pages); 378 dma_free_address(dev, dma_addr, npages); 379 } 380 381 static void *s390_dma_alloc(struct device *dev, size_t size, 382 dma_addr_t *dma_handle, gfp_t flag, 383 unsigned long attrs) 384 { 385 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 386 struct page *page; 387 unsigned long pa; 388 dma_addr_t map; 389 390 size = PAGE_ALIGN(size); 391 page = alloc_pages(flag, get_order(size)); 392 if (!page) 393 return NULL; 394 395 pa = page_to_phys(page); 396 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 397 if (dma_mapping_error(dev, map)) { 398 free_pages(pa, get_order(size)); 399 return NULL; 400 } 401 402 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 403 if (dma_handle) 404 *dma_handle = map; 405 return (void *) pa; 406 } 407 408 static void s390_dma_free(struct device *dev, size_t size, 409 void *pa, dma_addr_t dma_handle, 410 unsigned long attrs) 411 { 412 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 413 414 size = PAGE_ALIGN(size); 415 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 416 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 417 free_pages((unsigned long) pa, get_order(size)); 418 } 419 420 /* Map a segment into a contiguous dma address area */ 421 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 422 size_t size, dma_addr_t *handle, 423 enum dma_data_direction dir) 424 { 425 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 426 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 427 dma_addr_t dma_addr_base, dma_addr; 428 int flags = ZPCI_PTE_VALID; 429 struct scatterlist *s; 430 unsigned long pa = 0; 431 int ret; 432 433 dma_addr_base = dma_alloc_address(dev, nr_pages); 434 if (dma_addr_base == S390_MAPPING_ERROR) 435 return -ENOMEM; 436 437 dma_addr = dma_addr_base; 438 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 439 flags |= ZPCI_TABLE_PROTECTED; 440 441 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 442 pa = page_to_phys(sg_page(s)); 443 ret = __dma_update_trans(zdev, pa, dma_addr, 444 s->offset + s->length, flags); 445 if (ret) 446 goto unmap; 447 448 dma_addr += s->offset + s->length; 449 } 450 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 451 if (ret) 452 goto unmap; 453 454 *handle = dma_addr_base; 455 atomic64_add(nr_pages, &zdev->mapped_pages); 456 457 return ret; 458 459 unmap: 460 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 461 ZPCI_PTE_INVALID); 462 dma_free_address(dev, dma_addr_base, nr_pages); 463 zpci_err("map error:\n"); 464 zpci_err_dma(ret, pa); 465 return ret; 466 } 467 468 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 469 int nr_elements, enum dma_data_direction dir, 470 unsigned long attrs) 471 { 472 struct scatterlist *s = sg, *start = sg, *dma = sg; 473 unsigned int max = dma_get_max_seg_size(dev); 474 unsigned int size = s->offset + s->length; 475 unsigned int offset = s->offset; 476 int count = 0, i; 477 478 for (i = 1; i < nr_elements; i++) { 479 s = sg_next(s); 480 481 s->dma_address = S390_MAPPING_ERROR; 482 s->dma_length = 0; 483 484 if (s->offset || (size & ~PAGE_MASK) || 485 size + s->length > max) { 486 if (__s390_dma_map_sg(dev, start, size, 487 &dma->dma_address, dir)) 488 goto unmap; 489 490 dma->dma_address += offset; 491 dma->dma_length = size - offset; 492 493 size = offset = s->offset; 494 start = s; 495 dma = sg_next(dma); 496 count++; 497 } 498 size += s->length; 499 } 500 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 501 goto unmap; 502 503 dma->dma_address += offset; 504 dma->dma_length = size - offset; 505 506 return count + 1; 507 unmap: 508 for_each_sg(sg, s, count, i) 509 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 510 dir, attrs); 511 512 return 0; 513 } 514 515 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 516 int nr_elements, enum dma_data_direction dir, 517 unsigned long attrs) 518 { 519 struct scatterlist *s; 520 int i; 521 522 for_each_sg(sg, s, nr_elements, i) { 523 if (s->dma_length) 524 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 525 dir, attrs); 526 s->dma_address = 0; 527 s->dma_length = 0; 528 } 529 } 530 531 static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr) 532 { 533 return dma_addr == S390_MAPPING_ERROR; 534 } 535 536 int zpci_dma_init_device(struct zpci_dev *zdev) 537 { 538 int rc; 539 540 /* 541 * At this point, if the device is part of an IOMMU domain, this would 542 * be a strong hint towards a bug in the IOMMU API (common) code and/or 543 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 544 */ 545 WARN_ON(zdev->s390_domain); 546 547 spin_lock_init(&zdev->iommu_bitmap_lock); 548 spin_lock_init(&zdev->dma_table_lock); 549 550 zdev->dma_table = dma_alloc_cpu_table(); 551 if (!zdev->dma_table) { 552 rc = -ENOMEM; 553 goto out; 554 } 555 556 /* 557 * Restrict the iommu bitmap size to the minimum of the following: 558 * - main memory size 559 * - 3-level pagetable address limit minus start_dma offset 560 * - DMA address range allowed by the hardware (clp query pci fn) 561 * 562 * Also set zdev->end_dma to the actual end address of the usable 563 * range, instead of the theoretical maximum as reported by hardware. 564 */ 565 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 566 zdev->iommu_size = min3((u64) high_memory, 567 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 568 zdev->end_dma - zdev->start_dma + 1); 569 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 570 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 571 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 572 if (!zdev->iommu_bitmap) { 573 rc = -ENOMEM; 574 goto free_dma_table; 575 } 576 if (!s390_iommu_strict) { 577 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 578 if (!zdev->lazy_bitmap) { 579 rc = -ENOMEM; 580 goto free_bitmap; 581 } 582 583 } 584 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 585 (u64) zdev->dma_table); 586 if (rc) 587 goto free_bitmap; 588 589 return 0; 590 free_bitmap: 591 vfree(zdev->iommu_bitmap); 592 zdev->iommu_bitmap = NULL; 593 vfree(zdev->lazy_bitmap); 594 zdev->lazy_bitmap = NULL; 595 free_dma_table: 596 dma_free_cpu_table(zdev->dma_table); 597 zdev->dma_table = NULL; 598 out: 599 return rc; 600 } 601 602 void zpci_dma_exit_device(struct zpci_dev *zdev) 603 { 604 /* 605 * At this point, if the device is part of an IOMMU domain, this would 606 * be a strong hint towards a bug in the IOMMU API (common) code and/or 607 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 608 */ 609 WARN_ON(zdev->s390_domain); 610 611 if (zpci_unregister_ioat(zdev, 0)) 612 return; 613 614 dma_cleanup_tables(zdev->dma_table); 615 zdev->dma_table = NULL; 616 vfree(zdev->iommu_bitmap); 617 zdev->iommu_bitmap = NULL; 618 vfree(zdev->lazy_bitmap); 619 zdev->lazy_bitmap = NULL; 620 621 zdev->next_bit = 0; 622 } 623 624 static int __init dma_alloc_cpu_table_caches(void) 625 { 626 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 627 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 628 0, NULL); 629 if (!dma_region_table_cache) 630 return -ENOMEM; 631 632 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 633 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 634 0, NULL); 635 if (!dma_page_table_cache) { 636 kmem_cache_destroy(dma_region_table_cache); 637 return -ENOMEM; 638 } 639 return 0; 640 } 641 642 int __init zpci_dma_init(void) 643 { 644 return dma_alloc_cpu_table_caches(); 645 } 646 647 void zpci_dma_exit(void) 648 { 649 kmem_cache_destroy(dma_page_table_cache); 650 kmem_cache_destroy(dma_region_table_cache); 651 } 652 653 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 654 655 static int __init dma_debug_do_init(void) 656 { 657 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 658 return 0; 659 } 660 fs_initcall(dma_debug_do_init); 661 662 const struct dma_map_ops s390_pci_dma_ops = { 663 .alloc = s390_dma_alloc, 664 .free = s390_dma_free, 665 .map_sg = s390_dma_map_sg, 666 .unmap_sg = s390_dma_unmap_sg, 667 .map_page = s390_dma_map_pages, 668 .unmap_page = s390_dma_unmap_pages, 669 .mapping_error = s390_mapping_error, 670 /* if we support direct DMA this must be conditional */ 671 .is_phys = 0, 672 /* dma_supported is unconditionally true without a callback */ 673 }; 674 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 675 676 static int __init s390_iommu_setup(char *str) 677 { 678 if (!strncmp(str, "strict", 6)) 679 s390_iommu_strict = 1; 680 return 0; 681 } 682 683 __setup("s390_iommu=", s390_iommu_setup); 684