1 /* 2 * Copyright IBM Corp. 2012 3 * 4 * Author(s): 5 * Jan Glauber <jang@linux.vnet.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include <linux/export.h> 11 #include <linux/iommu-helper.h> 12 #include <linux/dma-mapping.h> 13 #include <linux/vmalloc.h> 14 #include <linux/pci.h> 15 #include <asm/pci_dma.h> 16 17 static struct kmem_cache *dma_region_table_cache; 18 static struct kmem_cache *dma_page_table_cache; 19 static int s390_iommu_strict; 20 21 static int zpci_refresh_global(struct zpci_dev *zdev) 22 { 23 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 24 zdev->iommu_pages * PAGE_SIZE); 25 } 26 27 unsigned long *dma_alloc_cpu_table(void) 28 { 29 unsigned long *table, *entry; 30 31 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 32 if (!table) 33 return NULL; 34 35 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 36 *entry = ZPCI_TABLE_INVALID; 37 return table; 38 } 39 40 static void dma_free_cpu_table(void *table) 41 { 42 kmem_cache_free(dma_region_table_cache, table); 43 } 44 45 static unsigned long *dma_alloc_page_table(void) 46 { 47 unsigned long *table, *entry; 48 49 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 50 if (!table) 51 return NULL; 52 53 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 54 *entry = ZPCI_PTE_INVALID; 55 return table; 56 } 57 58 static void dma_free_page_table(void *table) 59 { 60 kmem_cache_free(dma_page_table_cache, table); 61 } 62 63 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 64 { 65 unsigned long *sto; 66 67 if (reg_entry_isvalid(*entry)) 68 sto = get_rt_sto(*entry); 69 else { 70 sto = dma_alloc_cpu_table(); 71 if (!sto) 72 return NULL; 73 74 set_rt_sto(entry, sto); 75 validate_rt_entry(entry); 76 entry_clr_protected(entry); 77 } 78 return sto; 79 } 80 81 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 82 { 83 unsigned long *pto; 84 85 if (reg_entry_isvalid(*entry)) 86 pto = get_st_pto(*entry); 87 else { 88 pto = dma_alloc_page_table(); 89 if (!pto) 90 return NULL; 91 set_st_pto(entry, pto); 92 validate_st_entry(entry); 93 entry_clr_protected(entry); 94 } 95 return pto; 96 } 97 98 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 99 { 100 unsigned long *sto, *pto; 101 unsigned int rtx, sx, px; 102 103 rtx = calc_rtx(dma_addr); 104 sto = dma_get_seg_table_origin(&rto[rtx]); 105 if (!sto) 106 return NULL; 107 108 sx = calc_sx(dma_addr); 109 pto = dma_get_page_table_origin(&sto[sx]); 110 if (!pto) 111 return NULL; 112 113 px = calc_px(dma_addr); 114 return &pto[px]; 115 } 116 117 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 118 { 119 if (flags & ZPCI_PTE_INVALID) { 120 invalidate_pt_entry(entry); 121 } else { 122 set_pt_pfaa(entry, page_addr); 123 validate_pt_entry(entry); 124 } 125 126 if (flags & ZPCI_TABLE_PROTECTED) 127 entry_set_protected(entry); 128 else 129 entry_clr_protected(entry); 130 } 131 132 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 133 dma_addr_t dma_addr, size_t size, int flags) 134 { 135 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 136 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 137 unsigned long irq_flags; 138 unsigned long *entry; 139 int i, rc = 0; 140 141 if (!nr_pages) 142 return -EINVAL; 143 144 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 145 if (!zdev->dma_table) { 146 rc = -EINVAL; 147 goto out_unlock; 148 } 149 150 for (i = 0; i < nr_pages; i++) { 151 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 152 if (!entry) { 153 rc = -ENOMEM; 154 goto undo_cpu_trans; 155 } 156 dma_update_cpu_trans(entry, page_addr, flags); 157 page_addr += PAGE_SIZE; 158 dma_addr += PAGE_SIZE; 159 } 160 161 undo_cpu_trans: 162 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 163 flags = ZPCI_PTE_INVALID; 164 while (i-- > 0) { 165 page_addr -= PAGE_SIZE; 166 dma_addr -= PAGE_SIZE; 167 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 168 if (!entry) 169 break; 170 dma_update_cpu_trans(entry, page_addr, flags); 171 } 172 } 173 out_unlock: 174 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 175 return rc; 176 } 177 178 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 179 size_t size, int flags) 180 { 181 /* 182 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 183 * translations when previously invalid translation-table entries are 184 * validated. With lazy unmap, rpcit is skipped for previously valid 185 * entries, but a global rpcit is then required before any address can 186 * be re-used, i.e. after each iommu bitmap wrap-around. 187 */ 188 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 189 if (!zdev->tlb_refresh) 190 return 0; 191 } else { 192 if (!s390_iommu_strict) 193 return 0; 194 } 195 196 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 197 PAGE_ALIGN(size)); 198 } 199 200 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 201 dma_addr_t dma_addr, size_t size, int flags) 202 { 203 int rc; 204 205 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 206 if (rc) 207 return rc; 208 209 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 210 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 211 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 212 213 return rc; 214 } 215 216 void dma_free_seg_table(unsigned long entry) 217 { 218 unsigned long *sto = get_rt_sto(entry); 219 int sx; 220 221 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 222 if (reg_entry_isvalid(sto[sx])) 223 dma_free_page_table(get_st_pto(sto[sx])); 224 225 dma_free_cpu_table(sto); 226 } 227 228 void dma_cleanup_tables(unsigned long *table) 229 { 230 int rtx; 231 232 if (!table) 233 return; 234 235 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 236 if (reg_entry_isvalid(table[rtx])) 237 dma_free_seg_table(table[rtx]); 238 239 dma_free_cpu_table(table); 240 } 241 242 static unsigned long __dma_alloc_iommu(struct device *dev, 243 unsigned long start, int size) 244 { 245 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 246 unsigned long boundary_size; 247 248 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 249 PAGE_SIZE) >> PAGE_SHIFT; 250 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 251 start, size, zdev->start_dma >> PAGE_SHIFT, 252 boundary_size, 0); 253 } 254 255 static dma_addr_t dma_alloc_address(struct device *dev, int size) 256 { 257 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 258 unsigned long offset, flags; 259 260 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 261 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 262 if (offset == -1) { 263 if (!s390_iommu_strict) { 264 /* global flush before DMA addresses are reused */ 265 if (zpci_refresh_global(zdev)) 266 goto out_error; 267 268 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 269 zdev->lazy_bitmap, zdev->iommu_pages); 270 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 271 } 272 /* wrap-around */ 273 offset = __dma_alloc_iommu(dev, 0, size); 274 if (offset == -1) 275 goto out_error; 276 } 277 zdev->next_bit = offset + size; 278 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 279 280 return zdev->start_dma + offset * PAGE_SIZE; 281 282 out_error: 283 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 284 return DMA_ERROR_CODE; 285 } 286 287 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 288 { 289 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 290 unsigned long flags, offset; 291 292 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 293 294 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 295 if (!zdev->iommu_bitmap) 296 goto out; 297 298 if (s390_iommu_strict) 299 bitmap_clear(zdev->iommu_bitmap, offset, size); 300 else 301 bitmap_set(zdev->lazy_bitmap, offset, size); 302 303 out: 304 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 305 } 306 307 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 308 { 309 struct { 310 unsigned long rc; 311 unsigned long addr; 312 } __packed data = {rc, addr}; 313 314 zpci_err_hex(&data, sizeof(data)); 315 } 316 317 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 318 unsigned long offset, size_t size, 319 enum dma_data_direction direction, 320 unsigned long attrs) 321 { 322 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 323 unsigned long pa = page_to_phys(page) + offset; 324 int flags = ZPCI_PTE_VALID; 325 unsigned long nr_pages; 326 dma_addr_t dma_addr; 327 int ret; 328 329 /* This rounds up number of pages based on size and offset */ 330 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 331 dma_addr = dma_alloc_address(dev, nr_pages); 332 if (dma_addr == DMA_ERROR_CODE) { 333 ret = -ENOSPC; 334 goto out_err; 335 } 336 337 /* Use rounded up size */ 338 size = nr_pages * PAGE_SIZE; 339 340 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 341 flags |= ZPCI_TABLE_PROTECTED; 342 343 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 344 if (ret) 345 goto out_free; 346 347 atomic64_add(nr_pages, &zdev->mapped_pages); 348 return dma_addr + (offset & ~PAGE_MASK); 349 350 out_free: 351 dma_free_address(dev, dma_addr, nr_pages); 352 out_err: 353 zpci_err("map error:\n"); 354 zpci_err_dma(ret, pa); 355 return DMA_ERROR_CODE; 356 } 357 358 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 359 size_t size, enum dma_data_direction direction, 360 unsigned long attrs) 361 { 362 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 363 int npages, ret; 364 365 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 366 dma_addr = dma_addr & PAGE_MASK; 367 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 368 ZPCI_PTE_INVALID); 369 if (ret) { 370 zpci_err("unmap error:\n"); 371 zpci_err_dma(ret, dma_addr); 372 return; 373 } 374 375 atomic64_add(npages, &zdev->unmapped_pages); 376 dma_free_address(dev, dma_addr, npages); 377 } 378 379 static void *s390_dma_alloc(struct device *dev, size_t size, 380 dma_addr_t *dma_handle, gfp_t flag, 381 unsigned long attrs) 382 { 383 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 384 struct page *page; 385 unsigned long pa; 386 dma_addr_t map; 387 388 size = PAGE_ALIGN(size); 389 page = alloc_pages(flag, get_order(size)); 390 if (!page) 391 return NULL; 392 393 pa = page_to_phys(page); 394 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 395 if (dma_mapping_error(dev, map)) { 396 free_pages(pa, get_order(size)); 397 return NULL; 398 } 399 400 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 401 if (dma_handle) 402 *dma_handle = map; 403 return (void *) pa; 404 } 405 406 static void s390_dma_free(struct device *dev, size_t size, 407 void *pa, dma_addr_t dma_handle, 408 unsigned long attrs) 409 { 410 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 411 412 size = PAGE_ALIGN(size); 413 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 414 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 415 free_pages((unsigned long) pa, get_order(size)); 416 } 417 418 /* Map a segment into a contiguous dma address area */ 419 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 420 size_t size, dma_addr_t *handle, 421 enum dma_data_direction dir) 422 { 423 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 424 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 425 dma_addr_t dma_addr_base, dma_addr; 426 int flags = ZPCI_PTE_VALID; 427 struct scatterlist *s; 428 unsigned long pa = 0; 429 int ret; 430 431 dma_addr_base = dma_alloc_address(dev, nr_pages); 432 if (dma_addr_base == DMA_ERROR_CODE) 433 return -ENOMEM; 434 435 dma_addr = dma_addr_base; 436 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 437 flags |= ZPCI_TABLE_PROTECTED; 438 439 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 440 pa = page_to_phys(sg_page(s)); 441 ret = __dma_update_trans(zdev, pa, dma_addr, 442 s->offset + s->length, flags); 443 if (ret) 444 goto unmap; 445 446 dma_addr += s->offset + s->length; 447 } 448 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 449 if (ret) 450 goto unmap; 451 452 *handle = dma_addr_base; 453 atomic64_add(nr_pages, &zdev->mapped_pages); 454 455 return ret; 456 457 unmap: 458 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 459 ZPCI_PTE_INVALID); 460 dma_free_address(dev, dma_addr_base, nr_pages); 461 zpci_err("map error:\n"); 462 zpci_err_dma(ret, pa); 463 return ret; 464 } 465 466 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 467 int nr_elements, enum dma_data_direction dir, 468 unsigned long attrs) 469 { 470 struct scatterlist *s = sg, *start = sg, *dma = sg; 471 unsigned int max = dma_get_max_seg_size(dev); 472 unsigned int size = s->offset + s->length; 473 unsigned int offset = s->offset; 474 int count = 0, i; 475 476 for (i = 1; i < nr_elements; i++) { 477 s = sg_next(s); 478 479 s->dma_address = DMA_ERROR_CODE; 480 s->dma_length = 0; 481 482 if (s->offset || (size & ~PAGE_MASK) || 483 size + s->length > max) { 484 if (__s390_dma_map_sg(dev, start, size, 485 &dma->dma_address, dir)) 486 goto unmap; 487 488 dma->dma_address += offset; 489 dma->dma_length = size - offset; 490 491 size = offset = s->offset; 492 start = s; 493 dma = sg_next(dma); 494 count++; 495 } 496 size += s->length; 497 } 498 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 499 goto unmap; 500 501 dma->dma_address += offset; 502 dma->dma_length = size - offset; 503 504 return count + 1; 505 unmap: 506 for_each_sg(sg, s, count, i) 507 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 508 dir, attrs); 509 510 return 0; 511 } 512 513 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 514 int nr_elements, enum dma_data_direction dir, 515 unsigned long attrs) 516 { 517 struct scatterlist *s; 518 int i; 519 520 for_each_sg(sg, s, nr_elements, i) { 521 if (s->dma_length) 522 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 523 dir, attrs); 524 s->dma_address = 0; 525 s->dma_length = 0; 526 } 527 } 528 529 int zpci_dma_init_device(struct zpci_dev *zdev) 530 { 531 int rc; 532 533 /* 534 * At this point, if the device is part of an IOMMU domain, this would 535 * be a strong hint towards a bug in the IOMMU API (common) code and/or 536 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 537 */ 538 WARN_ON(zdev->s390_domain); 539 540 spin_lock_init(&zdev->iommu_bitmap_lock); 541 spin_lock_init(&zdev->dma_table_lock); 542 543 zdev->dma_table = dma_alloc_cpu_table(); 544 if (!zdev->dma_table) { 545 rc = -ENOMEM; 546 goto out; 547 } 548 549 /* 550 * Restrict the iommu bitmap size to the minimum of the following: 551 * - main memory size 552 * - 3-level pagetable address limit minus start_dma offset 553 * - DMA address range allowed by the hardware (clp query pci fn) 554 * 555 * Also set zdev->end_dma to the actual end address of the usable 556 * range, instead of the theoretical maximum as reported by hardware. 557 */ 558 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 559 zdev->iommu_size = min3((u64) high_memory, 560 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 561 zdev->end_dma - zdev->start_dma + 1); 562 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 563 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 564 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 565 if (!zdev->iommu_bitmap) { 566 rc = -ENOMEM; 567 goto free_dma_table; 568 } 569 if (!s390_iommu_strict) { 570 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 571 if (!zdev->lazy_bitmap) { 572 rc = -ENOMEM; 573 goto free_bitmap; 574 } 575 576 } 577 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 578 (u64) zdev->dma_table); 579 if (rc) 580 goto free_bitmap; 581 582 return 0; 583 free_bitmap: 584 vfree(zdev->iommu_bitmap); 585 zdev->iommu_bitmap = NULL; 586 vfree(zdev->lazy_bitmap); 587 zdev->lazy_bitmap = NULL; 588 free_dma_table: 589 dma_free_cpu_table(zdev->dma_table); 590 zdev->dma_table = NULL; 591 out: 592 return rc; 593 } 594 595 void zpci_dma_exit_device(struct zpci_dev *zdev) 596 { 597 /* 598 * At this point, if the device is part of an IOMMU domain, this would 599 * be a strong hint towards a bug in the IOMMU API (common) code and/or 600 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 601 */ 602 WARN_ON(zdev->s390_domain); 603 604 zpci_unregister_ioat(zdev, 0); 605 dma_cleanup_tables(zdev->dma_table); 606 zdev->dma_table = NULL; 607 vfree(zdev->iommu_bitmap); 608 zdev->iommu_bitmap = NULL; 609 vfree(zdev->lazy_bitmap); 610 zdev->lazy_bitmap = NULL; 611 612 zdev->next_bit = 0; 613 } 614 615 static int __init dma_alloc_cpu_table_caches(void) 616 { 617 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 618 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 619 0, NULL); 620 if (!dma_region_table_cache) 621 return -ENOMEM; 622 623 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 624 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 625 0, NULL); 626 if (!dma_page_table_cache) { 627 kmem_cache_destroy(dma_region_table_cache); 628 return -ENOMEM; 629 } 630 return 0; 631 } 632 633 int __init zpci_dma_init(void) 634 { 635 return dma_alloc_cpu_table_caches(); 636 } 637 638 void zpci_dma_exit(void) 639 { 640 kmem_cache_destroy(dma_page_table_cache); 641 kmem_cache_destroy(dma_region_table_cache); 642 } 643 644 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 645 646 static int __init dma_debug_do_init(void) 647 { 648 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 649 return 0; 650 } 651 fs_initcall(dma_debug_do_init); 652 653 const struct dma_map_ops s390_pci_dma_ops = { 654 .alloc = s390_dma_alloc, 655 .free = s390_dma_free, 656 .map_sg = s390_dma_map_sg, 657 .unmap_sg = s390_dma_unmap_sg, 658 .map_page = s390_dma_map_pages, 659 .unmap_page = s390_dma_unmap_pages, 660 /* if we support direct DMA this must be conditional */ 661 .is_phys = 0, 662 /* dma_supported is unconditionally true without a callback */ 663 }; 664 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 665 666 static int __init s390_iommu_setup(char *str) 667 { 668 if (!strncmp(str, "strict", 6)) 669 s390_iommu_strict = 1; 670 return 0; 671 } 672 673 __setup("s390_iommu=", s390_iommu_setup); 674