1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/export.h> 12 #include <linux/iommu-helper.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/vmalloc.h> 15 #include <linux/pci.h> 16 #include <asm/pci_dma.h> 17 18 static struct kmem_cache *dma_region_table_cache; 19 static struct kmem_cache *dma_page_table_cache; 20 static int s390_iommu_strict; 21 22 static int zpci_refresh_global(struct zpci_dev *zdev) 23 { 24 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 25 zdev->iommu_pages * PAGE_SIZE); 26 } 27 28 unsigned long *dma_alloc_cpu_table(void) 29 { 30 unsigned long *table, *entry; 31 32 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 33 if (!table) 34 return NULL; 35 36 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 37 *entry = ZPCI_TABLE_INVALID; 38 return table; 39 } 40 41 static void dma_free_cpu_table(void *table) 42 { 43 kmem_cache_free(dma_region_table_cache, table); 44 } 45 46 static unsigned long *dma_alloc_page_table(void) 47 { 48 unsigned long *table, *entry; 49 50 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 51 if (!table) 52 return NULL; 53 54 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 55 *entry = ZPCI_PTE_INVALID; 56 return table; 57 } 58 59 static void dma_free_page_table(void *table) 60 { 61 kmem_cache_free(dma_page_table_cache, table); 62 } 63 64 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 65 { 66 unsigned long *sto; 67 68 if (reg_entry_isvalid(*entry)) 69 sto = get_rt_sto(*entry); 70 else { 71 sto = dma_alloc_cpu_table(); 72 if (!sto) 73 return NULL; 74 75 set_rt_sto(entry, sto); 76 validate_rt_entry(entry); 77 entry_clr_protected(entry); 78 } 79 return sto; 80 } 81 82 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 83 { 84 unsigned long *pto; 85 86 if (reg_entry_isvalid(*entry)) 87 pto = get_st_pto(*entry); 88 else { 89 pto = dma_alloc_page_table(); 90 if (!pto) 91 return NULL; 92 set_st_pto(entry, pto); 93 validate_st_entry(entry); 94 entry_clr_protected(entry); 95 } 96 return pto; 97 } 98 99 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 100 { 101 unsigned long *sto, *pto; 102 unsigned int rtx, sx, px; 103 104 rtx = calc_rtx(dma_addr); 105 sto = dma_get_seg_table_origin(&rto[rtx]); 106 if (!sto) 107 return NULL; 108 109 sx = calc_sx(dma_addr); 110 pto = dma_get_page_table_origin(&sto[sx]); 111 if (!pto) 112 return NULL; 113 114 px = calc_px(dma_addr); 115 return &pto[px]; 116 } 117 118 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 119 { 120 if (flags & ZPCI_PTE_INVALID) { 121 invalidate_pt_entry(entry); 122 } else { 123 set_pt_pfaa(entry, page_addr); 124 validate_pt_entry(entry); 125 } 126 127 if (flags & ZPCI_TABLE_PROTECTED) 128 entry_set_protected(entry); 129 else 130 entry_clr_protected(entry); 131 } 132 133 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 134 dma_addr_t dma_addr, size_t size, int flags) 135 { 136 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 137 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 138 unsigned long irq_flags; 139 unsigned long *entry; 140 int i, rc = 0; 141 142 if (!nr_pages) 143 return -EINVAL; 144 145 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 146 if (!zdev->dma_table) { 147 rc = -EINVAL; 148 goto out_unlock; 149 } 150 151 for (i = 0; i < nr_pages; i++) { 152 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 153 if (!entry) { 154 rc = -ENOMEM; 155 goto undo_cpu_trans; 156 } 157 dma_update_cpu_trans(entry, page_addr, flags); 158 page_addr += PAGE_SIZE; 159 dma_addr += PAGE_SIZE; 160 } 161 162 undo_cpu_trans: 163 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 164 flags = ZPCI_PTE_INVALID; 165 while (i-- > 0) { 166 page_addr -= PAGE_SIZE; 167 dma_addr -= PAGE_SIZE; 168 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 169 if (!entry) 170 break; 171 dma_update_cpu_trans(entry, page_addr, flags); 172 } 173 } 174 out_unlock: 175 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 176 return rc; 177 } 178 179 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 180 size_t size, int flags) 181 { 182 unsigned long irqflags; 183 int ret; 184 185 /* 186 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 187 * translations when previously invalid translation-table entries are 188 * validated. With lazy unmap, rpcit is skipped for previously valid 189 * entries, but a global rpcit is then required before any address can 190 * be re-used, i.e. after each iommu bitmap wrap-around. 191 */ 192 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 193 if (!zdev->tlb_refresh) 194 return 0; 195 } else { 196 if (!s390_iommu_strict) 197 return 0; 198 } 199 200 ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 201 PAGE_ALIGN(size)); 202 if (ret == -ENOMEM && !s390_iommu_strict) { 203 /* enable the hypervisor to free some resources */ 204 if (zpci_refresh_global(zdev)) 205 goto out; 206 207 spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); 208 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 209 zdev->lazy_bitmap, zdev->iommu_pages); 210 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 211 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); 212 ret = 0; 213 } 214 out: 215 return ret; 216 } 217 218 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 219 dma_addr_t dma_addr, size_t size, int flags) 220 { 221 int rc; 222 223 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 224 if (rc) 225 return rc; 226 227 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 228 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 229 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 230 231 return rc; 232 } 233 234 void dma_free_seg_table(unsigned long entry) 235 { 236 unsigned long *sto = get_rt_sto(entry); 237 int sx; 238 239 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 240 if (reg_entry_isvalid(sto[sx])) 241 dma_free_page_table(get_st_pto(sto[sx])); 242 243 dma_free_cpu_table(sto); 244 } 245 246 void dma_cleanup_tables(unsigned long *table) 247 { 248 int rtx; 249 250 if (!table) 251 return; 252 253 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 254 if (reg_entry_isvalid(table[rtx])) 255 dma_free_seg_table(table[rtx]); 256 257 dma_free_cpu_table(table); 258 } 259 260 static unsigned long __dma_alloc_iommu(struct device *dev, 261 unsigned long start, int size) 262 { 263 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 264 unsigned long boundary_size; 265 266 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 267 PAGE_SIZE) >> PAGE_SHIFT; 268 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 269 start, size, zdev->start_dma >> PAGE_SHIFT, 270 boundary_size, 0); 271 } 272 273 static dma_addr_t dma_alloc_address(struct device *dev, int size) 274 { 275 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 276 unsigned long offset, flags; 277 278 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 279 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 280 if (offset == -1) { 281 if (!s390_iommu_strict) { 282 /* global flush before DMA addresses are reused */ 283 if (zpci_refresh_global(zdev)) 284 goto out_error; 285 286 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 287 zdev->lazy_bitmap, zdev->iommu_pages); 288 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 289 } 290 /* wrap-around */ 291 offset = __dma_alloc_iommu(dev, 0, size); 292 if (offset == -1) 293 goto out_error; 294 } 295 zdev->next_bit = offset + size; 296 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 297 298 return zdev->start_dma + offset * PAGE_SIZE; 299 300 out_error: 301 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 302 return DMA_MAPPING_ERROR; 303 } 304 305 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 306 { 307 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 308 unsigned long flags, offset; 309 310 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 311 312 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 313 if (!zdev->iommu_bitmap) 314 goto out; 315 316 if (s390_iommu_strict) 317 bitmap_clear(zdev->iommu_bitmap, offset, size); 318 else 319 bitmap_set(zdev->lazy_bitmap, offset, size); 320 321 out: 322 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 323 } 324 325 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 326 { 327 struct { 328 unsigned long rc; 329 unsigned long addr; 330 } __packed data = {rc, addr}; 331 332 zpci_err_hex(&data, sizeof(data)); 333 } 334 335 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 336 unsigned long offset, size_t size, 337 enum dma_data_direction direction, 338 unsigned long attrs) 339 { 340 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 341 unsigned long pa = page_to_phys(page) + offset; 342 int flags = ZPCI_PTE_VALID; 343 unsigned long nr_pages; 344 dma_addr_t dma_addr; 345 int ret; 346 347 /* This rounds up number of pages based on size and offset */ 348 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 349 dma_addr = dma_alloc_address(dev, nr_pages); 350 if (dma_addr == DMA_MAPPING_ERROR) { 351 ret = -ENOSPC; 352 goto out_err; 353 } 354 355 /* Use rounded up size */ 356 size = nr_pages * PAGE_SIZE; 357 358 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 359 flags |= ZPCI_TABLE_PROTECTED; 360 361 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 362 if (ret) 363 goto out_free; 364 365 atomic64_add(nr_pages, &zdev->mapped_pages); 366 return dma_addr + (offset & ~PAGE_MASK); 367 368 out_free: 369 dma_free_address(dev, dma_addr, nr_pages); 370 out_err: 371 zpci_err("map error:\n"); 372 zpci_err_dma(ret, pa); 373 return DMA_MAPPING_ERROR; 374 } 375 376 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 377 size_t size, enum dma_data_direction direction, 378 unsigned long attrs) 379 { 380 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 381 int npages, ret; 382 383 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 384 dma_addr = dma_addr & PAGE_MASK; 385 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 386 ZPCI_PTE_INVALID); 387 if (ret) { 388 zpci_err("unmap error:\n"); 389 zpci_err_dma(ret, dma_addr); 390 return; 391 } 392 393 atomic64_add(npages, &zdev->unmapped_pages); 394 dma_free_address(dev, dma_addr, npages); 395 } 396 397 static void *s390_dma_alloc(struct device *dev, size_t size, 398 dma_addr_t *dma_handle, gfp_t flag, 399 unsigned long attrs) 400 { 401 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 402 struct page *page; 403 unsigned long pa; 404 dma_addr_t map; 405 406 size = PAGE_ALIGN(size); 407 page = alloc_pages(flag | __GFP_ZERO, get_order(size)); 408 if (!page) 409 return NULL; 410 411 pa = page_to_phys(page); 412 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 413 if (dma_mapping_error(dev, map)) { 414 free_pages(pa, get_order(size)); 415 return NULL; 416 } 417 418 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 419 if (dma_handle) 420 *dma_handle = map; 421 return (void *) pa; 422 } 423 424 static void s390_dma_free(struct device *dev, size_t size, 425 void *pa, dma_addr_t dma_handle, 426 unsigned long attrs) 427 { 428 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 429 430 size = PAGE_ALIGN(size); 431 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 432 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 433 free_pages((unsigned long) pa, get_order(size)); 434 } 435 436 /* Map a segment into a contiguous dma address area */ 437 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 438 size_t size, dma_addr_t *handle, 439 enum dma_data_direction dir) 440 { 441 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 442 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 443 dma_addr_t dma_addr_base, dma_addr; 444 int flags = ZPCI_PTE_VALID; 445 struct scatterlist *s; 446 unsigned long pa = 0; 447 int ret; 448 449 dma_addr_base = dma_alloc_address(dev, nr_pages); 450 if (dma_addr_base == DMA_MAPPING_ERROR) 451 return -ENOMEM; 452 453 dma_addr = dma_addr_base; 454 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 455 flags |= ZPCI_TABLE_PROTECTED; 456 457 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 458 pa = page_to_phys(sg_page(s)); 459 ret = __dma_update_trans(zdev, pa, dma_addr, 460 s->offset + s->length, flags); 461 if (ret) 462 goto unmap; 463 464 dma_addr += s->offset + s->length; 465 } 466 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 467 if (ret) 468 goto unmap; 469 470 *handle = dma_addr_base; 471 atomic64_add(nr_pages, &zdev->mapped_pages); 472 473 return ret; 474 475 unmap: 476 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 477 ZPCI_PTE_INVALID); 478 dma_free_address(dev, dma_addr_base, nr_pages); 479 zpci_err("map error:\n"); 480 zpci_err_dma(ret, pa); 481 return ret; 482 } 483 484 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 485 int nr_elements, enum dma_data_direction dir, 486 unsigned long attrs) 487 { 488 struct scatterlist *s = sg, *start = sg, *dma = sg; 489 unsigned int max = dma_get_max_seg_size(dev); 490 unsigned int size = s->offset + s->length; 491 unsigned int offset = s->offset; 492 int count = 0, i; 493 494 for (i = 1; i < nr_elements; i++) { 495 s = sg_next(s); 496 497 s->dma_address = DMA_MAPPING_ERROR; 498 s->dma_length = 0; 499 500 if (s->offset || (size & ~PAGE_MASK) || 501 size + s->length > max) { 502 if (__s390_dma_map_sg(dev, start, size, 503 &dma->dma_address, dir)) 504 goto unmap; 505 506 dma->dma_address += offset; 507 dma->dma_length = size - offset; 508 509 size = offset = s->offset; 510 start = s; 511 dma = sg_next(dma); 512 count++; 513 } 514 size += s->length; 515 } 516 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 517 goto unmap; 518 519 dma->dma_address += offset; 520 dma->dma_length = size - offset; 521 522 return count + 1; 523 unmap: 524 for_each_sg(sg, s, count, i) 525 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 526 dir, attrs); 527 528 return 0; 529 } 530 531 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 532 int nr_elements, enum dma_data_direction dir, 533 unsigned long attrs) 534 { 535 struct scatterlist *s; 536 int i; 537 538 for_each_sg(sg, s, nr_elements, i) { 539 if (s->dma_length) 540 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 541 dir, attrs); 542 s->dma_address = 0; 543 s->dma_length = 0; 544 } 545 } 546 547 int zpci_dma_init_device(struct zpci_dev *zdev) 548 { 549 int rc; 550 551 /* 552 * At this point, if the device is part of an IOMMU domain, this would 553 * be a strong hint towards a bug in the IOMMU API (common) code and/or 554 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 555 */ 556 WARN_ON(zdev->s390_domain); 557 558 spin_lock_init(&zdev->iommu_bitmap_lock); 559 spin_lock_init(&zdev->dma_table_lock); 560 561 zdev->dma_table = dma_alloc_cpu_table(); 562 if (!zdev->dma_table) { 563 rc = -ENOMEM; 564 goto out; 565 } 566 567 /* 568 * Restrict the iommu bitmap size to the minimum of the following: 569 * - main memory size 570 * - 3-level pagetable address limit minus start_dma offset 571 * - DMA address range allowed by the hardware (clp query pci fn) 572 * 573 * Also set zdev->end_dma to the actual end address of the usable 574 * range, instead of the theoretical maximum as reported by hardware. 575 */ 576 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 577 zdev->iommu_size = min3((u64) high_memory, 578 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 579 zdev->end_dma - zdev->start_dma + 1); 580 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 581 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 582 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 583 if (!zdev->iommu_bitmap) { 584 rc = -ENOMEM; 585 goto free_dma_table; 586 } 587 if (!s390_iommu_strict) { 588 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 589 if (!zdev->lazy_bitmap) { 590 rc = -ENOMEM; 591 goto free_bitmap; 592 } 593 594 } 595 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 596 (u64) zdev->dma_table); 597 if (rc) 598 goto free_bitmap; 599 600 return 0; 601 free_bitmap: 602 vfree(zdev->iommu_bitmap); 603 zdev->iommu_bitmap = NULL; 604 vfree(zdev->lazy_bitmap); 605 zdev->lazy_bitmap = NULL; 606 free_dma_table: 607 dma_free_cpu_table(zdev->dma_table); 608 zdev->dma_table = NULL; 609 out: 610 return rc; 611 } 612 613 void zpci_dma_exit_device(struct zpci_dev *zdev) 614 { 615 /* 616 * At this point, if the device is part of an IOMMU domain, this would 617 * be a strong hint towards a bug in the IOMMU API (common) code and/or 618 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 619 */ 620 WARN_ON(zdev->s390_domain); 621 622 if (zpci_unregister_ioat(zdev, 0)) 623 return; 624 625 dma_cleanup_tables(zdev->dma_table); 626 zdev->dma_table = NULL; 627 vfree(zdev->iommu_bitmap); 628 zdev->iommu_bitmap = NULL; 629 vfree(zdev->lazy_bitmap); 630 zdev->lazy_bitmap = NULL; 631 632 zdev->next_bit = 0; 633 } 634 635 static int __init dma_alloc_cpu_table_caches(void) 636 { 637 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 638 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 639 0, NULL); 640 if (!dma_region_table_cache) 641 return -ENOMEM; 642 643 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 644 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 645 0, NULL); 646 if (!dma_page_table_cache) { 647 kmem_cache_destroy(dma_region_table_cache); 648 return -ENOMEM; 649 } 650 return 0; 651 } 652 653 int __init zpci_dma_init(void) 654 { 655 return dma_alloc_cpu_table_caches(); 656 } 657 658 void zpci_dma_exit(void) 659 { 660 kmem_cache_destroy(dma_page_table_cache); 661 kmem_cache_destroy(dma_region_table_cache); 662 } 663 664 const struct dma_map_ops s390_pci_dma_ops = { 665 .alloc = s390_dma_alloc, 666 .free = s390_dma_free, 667 .map_sg = s390_dma_map_sg, 668 .unmap_sg = s390_dma_unmap_sg, 669 .map_page = s390_dma_map_pages, 670 .unmap_page = s390_dma_unmap_pages, 671 .mmap = dma_common_mmap, 672 .get_sgtable = dma_common_get_sgtable, 673 /* dma_supported is unconditionally true without a callback */ 674 }; 675 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 676 677 static int __init s390_iommu_setup(char *str) 678 { 679 if (!strcmp(str, "strict")) 680 s390_iommu_strict = 1; 681 return 1; 682 } 683 684 __setup("s390_iommu=", s390_iommu_setup); 685