1 /* 2 * Copyright IBM Corp. 2012 3 * 4 * Author(s): 5 * Jan Glauber <jang@linux.vnet.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include <linux/export.h> 11 #include <linux/iommu-helper.h> 12 #include <linux/dma-mapping.h> 13 #include <linux/vmalloc.h> 14 #include <linux/pci.h> 15 #include <asm/pci_dma.h> 16 17 static struct kmem_cache *dma_region_table_cache; 18 static struct kmem_cache *dma_page_table_cache; 19 static int s390_iommu_strict; 20 21 static int zpci_refresh_global(struct zpci_dev *zdev) 22 { 23 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 24 zdev->iommu_pages * PAGE_SIZE); 25 } 26 27 unsigned long *dma_alloc_cpu_table(void) 28 { 29 unsigned long *table, *entry; 30 31 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 32 if (!table) 33 return NULL; 34 35 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 36 *entry = ZPCI_TABLE_INVALID; 37 return table; 38 } 39 40 static void dma_free_cpu_table(void *table) 41 { 42 kmem_cache_free(dma_region_table_cache, table); 43 } 44 45 static unsigned long *dma_alloc_page_table(void) 46 { 47 unsigned long *table, *entry; 48 49 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 50 if (!table) 51 return NULL; 52 53 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 54 *entry = ZPCI_PTE_INVALID; 55 return table; 56 } 57 58 static void dma_free_page_table(void *table) 59 { 60 kmem_cache_free(dma_page_table_cache, table); 61 } 62 63 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 64 { 65 unsigned long *sto; 66 67 if (reg_entry_isvalid(*entry)) 68 sto = get_rt_sto(*entry); 69 else { 70 sto = dma_alloc_cpu_table(); 71 if (!sto) 72 return NULL; 73 74 set_rt_sto(entry, sto); 75 validate_rt_entry(entry); 76 entry_clr_protected(entry); 77 } 78 return sto; 79 } 80 81 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 82 { 83 unsigned long *pto; 84 85 if (reg_entry_isvalid(*entry)) 86 pto = get_st_pto(*entry); 87 else { 88 pto = dma_alloc_page_table(); 89 if (!pto) 90 return NULL; 91 set_st_pto(entry, pto); 92 validate_st_entry(entry); 93 entry_clr_protected(entry); 94 } 95 return pto; 96 } 97 98 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 99 { 100 unsigned long *sto, *pto; 101 unsigned int rtx, sx, px; 102 103 rtx = calc_rtx(dma_addr); 104 sto = dma_get_seg_table_origin(&rto[rtx]); 105 if (!sto) 106 return NULL; 107 108 sx = calc_sx(dma_addr); 109 pto = dma_get_page_table_origin(&sto[sx]); 110 if (!pto) 111 return NULL; 112 113 px = calc_px(dma_addr); 114 return &pto[px]; 115 } 116 117 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 118 { 119 if (flags & ZPCI_PTE_INVALID) { 120 invalidate_pt_entry(entry); 121 } else { 122 set_pt_pfaa(entry, page_addr); 123 validate_pt_entry(entry); 124 } 125 126 if (flags & ZPCI_TABLE_PROTECTED) 127 entry_set_protected(entry); 128 else 129 entry_clr_protected(entry); 130 } 131 132 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 133 dma_addr_t dma_addr, size_t size, int flags) 134 { 135 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 136 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 137 unsigned long irq_flags; 138 unsigned long *entry; 139 int i, rc = 0; 140 141 if (!nr_pages) 142 return -EINVAL; 143 144 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 145 if (!zdev->dma_table) { 146 rc = -EINVAL; 147 goto out_unlock; 148 } 149 150 for (i = 0; i < nr_pages; i++) { 151 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 152 if (!entry) { 153 rc = -ENOMEM; 154 goto undo_cpu_trans; 155 } 156 dma_update_cpu_trans(entry, page_addr, flags); 157 page_addr += PAGE_SIZE; 158 dma_addr += PAGE_SIZE; 159 } 160 161 undo_cpu_trans: 162 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 163 flags = ZPCI_PTE_INVALID; 164 while (i-- > 0) { 165 page_addr -= PAGE_SIZE; 166 dma_addr -= PAGE_SIZE; 167 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 168 if (!entry) 169 break; 170 dma_update_cpu_trans(entry, page_addr, flags); 171 } 172 } 173 out_unlock: 174 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 175 return rc; 176 } 177 178 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 179 size_t size, int flags) 180 { 181 /* 182 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 183 * translations when previously invalid translation-table entries are 184 * validated. With lazy unmap, it also is skipped for previously valid 185 * entries, but a global rpcit is then required before any address can 186 * be re-used, i.e. after each iommu bitmap wrap-around. 187 */ 188 if (!zdev->tlb_refresh && 189 (!s390_iommu_strict || 190 ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) 191 return 0; 192 193 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 194 PAGE_ALIGN(size)); 195 } 196 197 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 198 dma_addr_t dma_addr, size_t size, int flags) 199 { 200 int rc; 201 202 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 203 if (rc) 204 return rc; 205 206 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 207 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 208 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 209 210 return rc; 211 } 212 213 void dma_free_seg_table(unsigned long entry) 214 { 215 unsigned long *sto = get_rt_sto(entry); 216 int sx; 217 218 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 219 if (reg_entry_isvalid(sto[sx])) 220 dma_free_page_table(get_st_pto(sto[sx])); 221 222 dma_free_cpu_table(sto); 223 } 224 225 void dma_cleanup_tables(unsigned long *table) 226 { 227 int rtx; 228 229 if (!table) 230 return; 231 232 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 233 if (reg_entry_isvalid(table[rtx])) 234 dma_free_seg_table(table[rtx]); 235 236 dma_free_cpu_table(table); 237 } 238 239 static unsigned long __dma_alloc_iommu(struct device *dev, 240 unsigned long start, int size) 241 { 242 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 243 unsigned long boundary_size; 244 245 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 246 PAGE_SIZE) >> PAGE_SHIFT; 247 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 248 start, size, zdev->start_dma >> PAGE_SHIFT, 249 boundary_size, 0); 250 } 251 252 static dma_addr_t dma_alloc_address(struct device *dev, int size) 253 { 254 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 255 unsigned long offset, flags; 256 257 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 258 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 259 if (offset == -1) { 260 if (!zdev->tlb_refresh && !s390_iommu_strict) { 261 /* global flush before DMA addresses are reused */ 262 if (zpci_refresh_global(zdev)) 263 goto out_error; 264 265 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 266 zdev->lazy_bitmap, zdev->iommu_pages); 267 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 268 } 269 /* wrap-around */ 270 offset = __dma_alloc_iommu(dev, 0, size); 271 if (offset == -1) 272 goto out_error; 273 } 274 zdev->next_bit = offset + size; 275 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 276 277 return zdev->start_dma + offset * PAGE_SIZE; 278 279 out_error: 280 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 281 return DMA_ERROR_CODE; 282 } 283 284 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 285 { 286 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 287 unsigned long flags, offset; 288 289 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 290 291 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 292 if (!zdev->iommu_bitmap) 293 goto out; 294 295 if (zdev->tlb_refresh || s390_iommu_strict) 296 bitmap_clear(zdev->iommu_bitmap, offset, size); 297 else 298 bitmap_set(zdev->lazy_bitmap, offset, size); 299 300 out: 301 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 302 } 303 304 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 305 { 306 struct { 307 unsigned long rc; 308 unsigned long addr; 309 } __packed data = {rc, addr}; 310 311 zpci_err_hex(&data, sizeof(data)); 312 } 313 314 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 315 unsigned long offset, size_t size, 316 enum dma_data_direction direction, 317 unsigned long attrs) 318 { 319 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 320 unsigned long pa = page_to_phys(page) + offset; 321 int flags = ZPCI_PTE_VALID; 322 unsigned long nr_pages; 323 dma_addr_t dma_addr; 324 int ret; 325 326 /* This rounds up number of pages based on size and offset */ 327 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 328 dma_addr = dma_alloc_address(dev, nr_pages); 329 if (dma_addr == DMA_ERROR_CODE) { 330 ret = -ENOSPC; 331 goto out_err; 332 } 333 334 /* Use rounded up size */ 335 size = nr_pages * PAGE_SIZE; 336 337 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 338 flags |= ZPCI_TABLE_PROTECTED; 339 340 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 341 if (ret) 342 goto out_free; 343 344 atomic64_add(nr_pages, &zdev->mapped_pages); 345 return dma_addr + (offset & ~PAGE_MASK); 346 347 out_free: 348 dma_free_address(dev, dma_addr, nr_pages); 349 out_err: 350 zpci_err("map error:\n"); 351 zpci_err_dma(ret, pa); 352 return DMA_ERROR_CODE; 353 } 354 355 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 356 size_t size, enum dma_data_direction direction, 357 unsigned long attrs) 358 { 359 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 360 int npages, ret; 361 362 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 363 dma_addr = dma_addr & PAGE_MASK; 364 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 365 ZPCI_PTE_INVALID); 366 if (ret) { 367 zpci_err("unmap error:\n"); 368 zpci_err_dma(ret, dma_addr); 369 return; 370 } 371 372 atomic64_add(npages, &zdev->unmapped_pages); 373 dma_free_address(dev, dma_addr, npages); 374 } 375 376 static void *s390_dma_alloc(struct device *dev, size_t size, 377 dma_addr_t *dma_handle, gfp_t flag, 378 unsigned long attrs) 379 { 380 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 381 struct page *page; 382 unsigned long pa; 383 dma_addr_t map; 384 385 size = PAGE_ALIGN(size); 386 page = alloc_pages(flag, get_order(size)); 387 if (!page) 388 return NULL; 389 390 pa = page_to_phys(page); 391 memset((void *) pa, 0, size); 392 393 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 394 if (dma_mapping_error(dev, map)) { 395 free_pages(pa, get_order(size)); 396 return NULL; 397 } 398 399 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 400 if (dma_handle) 401 *dma_handle = map; 402 return (void *) pa; 403 } 404 405 static void s390_dma_free(struct device *dev, size_t size, 406 void *pa, dma_addr_t dma_handle, 407 unsigned long attrs) 408 { 409 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 410 411 size = PAGE_ALIGN(size); 412 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 413 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 414 free_pages((unsigned long) pa, get_order(size)); 415 } 416 417 /* Map a segment into a contiguous dma address area */ 418 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 419 size_t size, dma_addr_t *handle, 420 enum dma_data_direction dir) 421 { 422 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 423 dma_addr_t dma_addr_base, dma_addr; 424 int flags = ZPCI_PTE_VALID; 425 struct scatterlist *s; 426 unsigned long pa = 0; 427 int ret; 428 429 size = PAGE_ALIGN(size); 430 dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT); 431 if (dma_addr_base == DMA_ERROR_CODE) 432 return -ENOMEM; 433 434 dma_addr = dma_addr_base; 435 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 436 flags |= ZPCI_TABLE_PROTECTED; 437 438 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 439 pa = page_to_phys(sg_page(s)) + s->offset; 440 ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags); 441 if (ret) 442 goto unmap; 443 444 dma_addr += s->length; 445 } 446 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 447 if (ret) 448 goto unmap; 449 450 *handle = dma_addr_base; 451 atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages); 452 453 return ret; 454 455 unmap: 456 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 457 ZPCI_PTE_INVALID); 458 dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT); 459 zpci_err("map error:\n"); 460 zpci_err_dma(ret, pa); 461 return ret; 462 } 463 464 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 465 int nr_elements, enum dma_data_direction dir, 466 unsigned long attrs) 467 { 468 struct scatterlist *s = sg, *start = sg, *dma = sg; 469 unsigned int max = dma_get_max_seg_size(dev); 470 unsigned int size = s->offset + s->length; 471 unsigned int offset = s->offset; 472 int count = 0, i; 473 474 for (i = 1; i < nr_elements; i++) { 475 s = sg_next(s); 476 477 s->dma_address = DMA_ERROR_CODE; 478 s->dma_length = 0; 479 480 if (s->offset || (size & ~PAGE_MASK) || 481 size + s->length > max) { 482 if (__s390_dma_map_sg(dev, start, size, 483 &dma->dma_address, dir)) 484 goto unmap; 485 486 dma->dma_address += offset; 487 dma->dma_length = size - offset; 488 489 size = offset = s->offset; 490 start = s; 491 dma = sg_next(dma); 492 count++; 493 } 494 size += s->length; 495 } 496 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 497 goto unmap; 498 499 dma->dma_address += offset; 500 dma->dma_length = size - offset; 501 502 return count + 1; 503 unmap: 504 for_each_sg(sg, s, count, i) 505 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 506 dir, attrs); 507 508 return 0; 509 } 510 511 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 512 int nr_elements, enum dma_data_direction dir, 513 unsigned long attrs) 514 { 515 struct scatterlist *s; 516 int i; 517 518 for_each_sg(sg, s, nr_elements, i) { 519 if (s->dma_length) 520 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 521 dir, attrs); 522 s->dma_address = 0; 523 s->dma_length = 0; 524 } 525 } 526 527 int zpci_dma_init_device(struct zpci_dev *zdev) 528 { 529 int rc; 530 531 /* 532 * At this point, if the device is part of an IOMMU domain, this would 533 * be a strong hint towards a bug in the IOMMU API (common) code and/or 534 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 535 */ 536 WARN_ON(zdev->s390_domain); 537 538 spin_lock_init(&zdev->iommu_bitmap_lock); 539 spin_lock_init(&zdev->dma_table_lock); 540 541 zdev->dma_table = dma_alloc_cpu_table(); 542 if (!zdev->dma_table) { 543 rc = -ENOMEM; 544 goto out; 545 } 546 547 /* 548 * Restrict the iommu bitmap size to the minimum of the following: 549 * - main memory size 550 * - 3-level pagetable address limit minus start_dma offset 551 * - DMA address range allowed by the hardware (clp query pci fn) 552 * 553 * Also set zdev->end_dma to the actual end address of the usable 554 * range, instead of the theoretical maximum as reported by hardware. 555 */ 556 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 557 zdev->iommu_size = min3((u64) high_memory, 558 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 559 zdev->end_dma - zdev->start_dma + 1); 560 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 561 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 562 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 563 if (!zdev->iommu_bitmap) { 564 rc = -ENOMEM; 565 goto free_dma_table; 566 } 567 if (!zdev->tlb_refresh && !s390_iommu_strict) { 568 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 569 if (!zdev->lazy_bitmap) { 570 rc = -ENOMEM; 571 goto free_bitmap; 572 } 573 574 } 575 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 576 (u64) zdev->dma_table); 577 if (rc) 578 goto free_bitmap; 579 580 return 0; 581 free_bitmap: 582 vfree(zdev->iommu_bitmap); 583 zdev->iommu_bitmap = NULL; 584 vfree(zdev->lazy_bitmap); 585 zdev->lazy_bitmap = NULL; 586 free_dma_table: 587 dma_free_cpu_table(zdev->dma_table); 588 zdev->dma_table = NULL; 589 out: 590 return rc; 591 } 592 593 void zpci_dma_exit_device(struct zpci_dev *zdev) 594 { 595 /* 596 * At this point, if the device is part of an IOMMU domain, this would 597 * be a strong hint towards a bug in the IOMMU API (common) code and/or 598 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 599 */ 600 WARN_ON(zdev->s390_domain); 601 602 zpci_unregister_ioat(zdev, 0); 603 dma_cleanup_tables(zdev->dma_table); 604 zdev->dma_table = NULL; 605 vfree(zdev->iommu_bitmap); 606 zdev->iommu_bitmap = NULL; 607 vfree(zdev->lazy_bitmap); 608 zdev->lazy_bitmap = NULL; 609 610 zdev->next_bit = 0; 611 } 612 613 static int __init dma_alloc_cpu_table_caches(void) 614 { 615 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 616 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 617 0, NULL); 618 if (!dma_region_table_cache) 619 return -ENOMEM; 620 621 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 622 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 623 0, NULL); 624 if (!dma_page_table_cache) { 625 kmem_cache_destroy(dma_region_table_cache); 626 return -ENOMEM; 627 } 628 return 0; 629 } 630 631 int __init zpci_dma_init(void) 632 { 633 return dma_alloc_cpu_table_caches(); 634 } 635 636 void zpci_dma_exit(void) 637 { 638 kmem_cache_destroy(dma_page_table_cache); 639 kmem_cache_destroy(dma_region_table_cache); 640 } 641 642 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 643 644 static int __init dma_debug_do_init(void) 645 { 646 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 647 return 0; 648 } 649 fs_initcall(dma_debug_do_init); 650 651 struct dma_map_ops s390_pci_dma_ops = { 652 .alloc = s390_dma_alloc, 653 .free = s390_dma_free, 654 .map_sg = s390_dma_map_sg, 655 .unmap_sg = s390_dma_unmap_sg, 656 .map_page = s390_dma_map_pages, 657 .unmap_page = s390_dma_unmap_pages, 658 /* if we support direct DMA this must be conditional */ 659 .is_phys = 0, 660 /* dma_supported is unconditionally true without a callback */ 661 }; 662 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 663 664 static int __init s390_iommu_setup(char *str) 665 { 666 if (!strncmp(str, "strict", 6)) 667 s390_iommu_strict = 1; 668 return 0; 669 } 670 671 __setup("s390_iommu=", s390_iommu_setup); 672