1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/export.h> 12 #include <linux/iommu-helper.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/vmalloc.h> 15 #include <linux/pci.h> 16 #include <asm/pci_dma.h> 17 18 #define S390_MAPPING_ERROR (~(dma_addr_t) 0x0) 19 20 static struct kmem_cache *dma_region_table_cache; 21 static struct kmem_cache *dma_page_table_cache; 22 static int s390_iommu_strict; 23 24 static int zpci_refresh_global(struct zpci_dev *zdev) 25 { 26 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 27 zdev->iommu_pages * PAGE_SIZE); 28 } 29 30 unsigned long *dma_alloc_cpu_table(void) 31 { 32 unsigned long *table, *entry; 33 34 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 35 if (!table) 36 return NULL; 37 38 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 39 *entry = ZPCI_TABLE_INVALID; 40 return table; 41 } 42 43 static void dma_free_cpu_table(void *table) 44 { 45 kmem_cache_free(dma_region_table_cache, table); 46 } 47 48 static unsigned long *dma_alloc_page_table(void) 49 { 50 unsigned long *table, *entry; 51 52 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 53 if (!table) 54 return NULL; 55 56 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 57 *entry = ZPCI_PTE_INVALID; 58 return table; 59 } 60 61 static void dma_free_page_table(void *table) 62 { 63 kmem_cache_free(dma_page_table_cache, table); 64 } 65 66 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 67 { 68 unsigned long *sto; 69 70 if (reg_entry_isvalid(*entry)) 71 sto = get_rt_sto(*entry); 72 else { 73 sto = dma_alloc_cpu_table(); 74 if (!sto) 75 return NULL; 76 77 set_rt_sto(entry, sto); 78 validate_rt_entry(entry); 79 entry_clr_protected(entry); 80 } 81 return sto; 82 } 83 84 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 85 { 86 unsigned long *pto; 87 88 if (reg_entry_isvalid(*entry)) 89 pto = get_st_pto(*entry); 90 else { 91 pto = dma_alloc_page_table(); 92 if (!pto) 93 return NULL; 94 set_st_pto(entry, pto); 95 validate_st_entry(entry); 96 entry_clr_protected(entry); 97 } 98 return pto; 99 } 100 101 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 102 { 103 unsigned long *sto, *pto; 104 unsigned int rtx, sx, px; 105 106 rtx = calc_rtx(dma_addr); 107 sto = dma_get_seg_table_origin(&rto[rtx]); 108 if (!sto) 109 return NULL; 110 111 sx = calc_sx(dma_addr); 112 pto = dma_get_page_table_origin(&sto[sx]); 113 if (!pto) 114 return NULL; 115 116 px = calc_px(dma_addr); 117 return &pto[px]; 118 } 119 120 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 121 { 122 if (flags & ZPCI_PTE_INVALID) { 123 invalidate_pt_entry(entry); 124 } else { 125 set_pt_pfaa(entry, page_addr); 126 validate_pt_entry(entry); 127 } 128 129 if (flags & ZPCI_TABLE_PROTECTED) 130 entry_set_protected(entry); 131 else 132 entry_clr_protected(entry); 133 } 134 135 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 136 dma_addr_t dma_addr, size_t size, int flags) 137 { 138 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 139 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 140 unsigned long irq_flags; 141 unsigned long *entry; 142 int i, rc = 0; 143 144 if (!nr_pages) 145 return -EINVAL; 146 147 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 148 if (!zdev->dma_table) { 149 rc = -EINVAL; 150 goto out_unlock; 151 } 152 153 for (i = 0; i < nr_pages; i++) { 154 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 155 if (!entry) { 156 rc = -ENOMEM; 157 goto undo_cpu_trans; 158 } 159 dma_update_cpu_trans(entry, page_addr, flags); 160 page_addr += PAGE_SIZE; 161 dma_addr += PAGE_SIZE; 162 } 163 164 undo_cpu_trans: 165 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 166 flags = ZPCI_PTE_INVALID; 167 while (i-- > 0) { 168 page_addr -= PAGE_SIZE; 169 dma_addr -= PAGE_SIZE; 170 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 171 if (!entry) 172 break; 173 dma_update_cpu_trans(entry, page_addr, flags); 174 } 175 } 176 out_unlock: 177 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 178 return rc; 179 } 180 181 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 182 size_t size, int flags) 183 { 184 unsigned long irqflags; 185 int ret; 186 187 /* 188 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 189 * translations when previously invalid translation-table entries are 190 * validated. With lazy unmap, rpcit is skipped for previously valid 191 * entries, but a global rpcit is then required before any address can 192 * be re-used, i.e. after each iommu bitmap wrap-around. 193 */ 194 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 195 if (!zdev->tlb_refresh) 196 return 0; 197 } else { 198 if (!s390_iommu_strict) 199 return 0; 200 } 201 202 ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 203 PAGE_ALIGN(size)); 204 if (ret == -ENOMEM && !s390_iommu_strict) { 205 /* enable the hypervisor to free some resources */ 206 if (zpci_refresh_global(zdev)) 207 goto out; 208 209 spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); 210 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 211 zdev->lazy_bitmap, zdev->iommu_pages); 212 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 213 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); 214 ret = 0; 215 } 216 out: 217 return ret; 218 } 219 220 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 221 dma_addr_t dma_addr, size_t size, int flags) 222 { 223 int rc; 224 225 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 226 if (rc) 227 return rc; 228 229 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 230 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 231 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 232 233 return rc; 234 } 235 236 void dma_free_seg_table(unsigned long entry) 237 { 238 unsigned long *sto = get_rt_sto(entry); 239 int sx; 240 241 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 242 if (reg_entry_isvalid(sto[sx])) 243 dma_free_page_table(get_st_pto(sto[sx])); 244 245 dma_free_cpu_table(sto); 246 } 247 248 void dma_cleanup_tables(unsigned long *table) 249 { 250 int rtx; 251 252 if (!table) 253 return; 254 255 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 256 if (reg_entry_isvalid(table[rtx])) 257 dma_free_seg_table(table[rtx]); 258 259 dma_free_cpu_table(table); 260 } 261 262 static unsigned long __dma_alloc_iommu(struct device *dev, 263 unsigned long start, int size) 264 { 265 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 266 unsigned long boundary_size; 267 268 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 269 PAGE_SIZE) >> PAGE_SHIFT; 270 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 271 start, size, zdev->start_dma >> PAGE_SHIFT, 272 boundary_size, 0); 273 } 274 275 static dma_addr_t dma_alloc_address(struct device *dev, int size) 276 { 277 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 278 unsigned long offset, flags; 279 280 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 281 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 282 if (offset == -1) { 283 if (!s390_iommu_strict) { 284 /* global flush before DMA addresses are reused */ 285 if (zpci_refresh_global(zdev)) 286 goto out_error; 287 288 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 289 zdev->lazy_bitmap, zdev->iommu_pages); 290 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 291 } 292 /* wrap-around */ 293 offset = __dma_alloc_iommu(dev, 0, size); 294 if (offset == -1) 295 goto out_error; 296 } 297 zdev->next_bit = offset + size; 298 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 299 300 return zdev->start_dma + offset * PAGE_SIZE; 301 302 out_error: 303 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 304 return S390_MAPPING_ERROR; 305 } 306 307 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 308 { 309 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 310 unsigned long flags, offset; 311 312 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 313 314 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 315 if (!zdev->iommu_bitmap) 316 goto out; 317 318 if (s390_iommu_strict) 319 bitmap_clear(zdev->iommu_bitmap, offset, size); 320 else 321 bitmap_set(zdev->lazy_bitmap, offset, size); 322 323 out: 324 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 325 } 326 327 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 328 { 329 struct { 330 unsigned long rc; 331 unsigned long addr; 332 } __packed data = {rc, addr}; 333 334 zpci_err_hex(&data, sizeof(data)); 335 } 336 337 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 338 unsigned long offset, size_t size, 339 enum dma_data_direction direction, 340 unsigned long attrs) 341 { 342 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 343 unsigned long pa = page_to_phys(page) + offset; 344 int flags = ZPCI_PTE_VALID; 345 unsigned long nr_pages; 346 dma_addr_t dma_addr; 347 int ret; 348 349 /* This rounds up number of pages based on size and offset */ 350 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 351 dma_addr = dma_alloc_address(dev, nr_pages); 352 if (dma_addr == S390_MAPPING_ERROR) { 353 ret = -ENOSPC; 354 goto out_err; 355 } 356 357 /* Use rounded up size */ 358 size = nr_pages * PAGE_SIZE; 359 360 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 361 flags |= ZPCI_TABLE_PROTECTED; 362 363 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 364 if (ret) 365 goto out_free; 366 367 atomic64_add(nr_pages, &zdev->mapped_pages); 368 return dma_addr + (offset & ~PAGE_MASK); 369 370 out_free: 371 dma_free_address(dev, dma_addr, nr_pages); 372 out_err: 373 zpci_err("map error:\n"); 374 zpci_err_dma(ret, pa); 375 return S390_MAPPING_ERROR; 376 } 377 378 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 379 size_t size, enum dma_data_direction direction, 380 unsigned long attrs) 381 { 382 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 383 int npages, ret; 384 385 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 386 dma_addr = dma_addr & PAGE_MASK; 387 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 388 ZPCI_PTE_INVALID); 389 if (ret) { 390 zpci_err("unmap error:\n"); 391 zpci_err_dma(ret, dma_addr); 392 return; 393 } 394 395 atomic64_add(npages, &zdev->unmapped_pages); 396 dma_free_address(dev, dma_addr, npages); 397 } 398 399 static void *s390_dma_alloc(struct device *dev, size_t size, 400 dma_addr_t *dma_handle, gfp_t flag, 401 unsigned long attrs) 402 { 403 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 404 struct page *page; 405 unsigned long pa; 406 dma_addr_t map; 407 408 size = PAGE_ALIGN(size); 409 page = alloc_pages(flag, get_order(size)); 410 if (!page) 411 return NULL; 412 413 pa = page_to_phys(page); 414 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 415 if (dma_mapping_error(dev, map)) { 416 free_pages(pa, get_order(size)); 417 return NULL; 418 } 419 420 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 421 if (dma_handle) 422 *dma_handle = map; 423 return (void *) pa; 424 } 425 426 static void s390_dma_free(struct device *dev, size_t size, 427 void *pa, dma_addr_t dma_handle, 428 unsigned long attrs) 429 { 430 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 431 432 size = PAGE_ALIGN(size); 433 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 434 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 435 free_pages((unsigned long) pa, get_order(size)); 436 } 437 438 /* Map a segment into a contiguous dma address area */ 439 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 440 size_t size, dma_addr_t *handle, 441 enum dma_data_direction dir) 442 { 443 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 444 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 445 dma_addr_t dma_addr_base, dma_addr; 446 int flags = ZPCI_PTE_VALID; 447 struct scatterlist *s; 448 unsigned long pa = 0; 449 int ret; 450 451 dma_addr_base = dma_alloc_address(dev, nr_pages); 452 if (dma_addr_base == S390_MAPPING_ERROR) 453 return -ENOMEM; 454 455 dma_addr = dma_addr_base; 456 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 457 flags |= ZPCI_TABLE_PROTECTED; 458 459 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 460 pa = page_to_phys(sg_page(s)); 461 ret = __dma_update_trans(zdev, pa, dma_addr, 462 s->offset + s->length, flags); 463 if (ret) 464 goto unmap; 465 466 dma_addr += s->offset + s->length; 467 } 468 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 469 if (ret) 470 goto unmap; 471 472 *handle = dma_addr_base; 473 atomic64_add(nr_pages, &zdev->mapped_pages); 474 475 return ret; 476 477 unmap: 478 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 479 ZPCI_PTE_INVALID); 480 dma_free_address(dev, dma_addr_base, nr_pages); 481 zpci_err("map error:\n"); 482 zpci_err_dma(ret, pa); 483 return ret; 484 } 485 486 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 487 int nr_elements, enum dma_data_direction dir, 488 unsigned long attrs) 489 { 490 struct scatterlist *s = sg, *start = sg, *dma = sg; 491 unsigned int max = dma_get_max_seg_size(dev); 492 unsigned int size = s->offset + s->length; 493 unsigned int offset = s->offset; 494 int count = 0, i; 495 496 for (i = 1; i < nr_elements; i++) { 497 s = sg_next(s); 498 499 s->dma_address = S390_MAPPING_ERROR; 500 s->dma_length = 0; 501 502 if (s->offset || (size & ~PAGE_MASK) || 503 size + s->length > max) { 504 if (__s390_dma_map_sg(dev, start, size, 505 &dma->dma_address, dir)) 506 goto unmap; 507 508 dma->dma_address += offset; 509 dma->dma_length = size - offset; 510 511 size = offset = s->offset; 512 start = s; 513 dma = sg_next(dma); 514 count++; 515 } 516 size += s->length; 517 } 518 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 519 goto unmap; 520 521 dma->dma_address += offset; 522 dma->dma_length = size - offset; 523 524 return count + 1; 525 unmap: 526 for_each_sg(sg, s, count, i) 527 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 528 dir, attrs); 529 530 return 0; 531 } 532 533 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 534 int nr_elements, enum dma_data_direction dir, 535 unsigned long attrs) 536 { 537 struct scatterlist *s; 538 int i; 539 540 for_each_sg(sg, s, nr_elements, i) { 541 if (s->dma_length) 542 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 543 dir, attrs); 544 s->dma_address = 0; 545 s->dma_length = 0; 546 } 547 } 548 549 static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr) 550 { 551 return dma_addr == S390_MAPPING_ERROR; 552 } 553 554 int zpci_dma_init_device(struct zpci_dev *zdev) 555 { 556 int rc; 557 558 /* 559 * At this point, if the device is part of an IOMMU domain, this would 560 * be a strong hint towards a bug in the IOMMU API (common) code and/or 561 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 562 */ 563 WARN_ON(zdev->s390_domain); 564 565 spin_lock_init(&zdev->iommu_bitmap_lock); 566 spin_lock_init(&zdev->dma_table_lock); 567 568 zdev->dma_table = dma_alloc_cpu_table(); 569 if (!zdev->dma_table) { 570 rc = -ENOMEM; 571 goto out; 572 } 573 574 /* 575 * Restrict the iommu bitmap size to the minimum of the following: 576 * - main memory size 577 * - 3-level pagetable address limit minus start_dma offset 578 * - DMA address range allowed by the hardware (clp query pci fn) 579 * 580 * Also set zdev->end_dma to the actual end address of the usable 581 * range, instead of the theoretical maximum as reported by hardware. 582 */ 583 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 584 zdev->iommu_size = min3((u64) high_memory, 585 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 586 zdev->end_dma - zdev->start_dma + 1); 587 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 588 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 589 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 590 if (!zdev->iommu_bitmap) { 591 rc = -ENOMEM; 592 goto free_dma_table; 593 } 594 if (!s390_iommu_strict) { 595 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 596 if (!zdev->lazy_bitmap) { 597 rc = -ENOMEM; 598 goto free_bitmap; 599 } 600 601 } 602 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 603 (u64) zdev->dma_table); 604 if (rc) 605 goto free_bitmap; 606 607 return 0; 608 free_bitmap: 609 vfree(zdev->iommu_bitmap); 610 zdev->iommu_bitmap = NULL; 611 vfree(zdev->lazy_bitmap); 612 zdev->lazy_bitmap = NULL; 613 free_dma_table: 614 dma_free_cpu_table(zdev->dma_table); 615 zdev->dma_table = NULL; 616 out: 617 return rc; 618 } 619 620 void zpci_dma_exit_device(struct zpci_dev *zdev) 621 { 622 /* 623 * At this point, if the device is part of an IOMMU domain, this would 624 * be a strong hint towards a bug in the IOMMU API (common) code and/or 625 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 626 */ 627 WARN_ON(zdev->s390_domain); 628 629 if (zpci_unregister_ioat(zdev, 0)) 630 return; 631 632 dma_cleanup_tables(zdev->dma_table); 633 zdev->dma_table = NULL; 634 vfree(zdev->iommu_bitmap); 635 zdev->iommu_bitmap = NULL; 636 vfree(zdev->lazy_bitmap); 637 zdev->lazy_bitmap = NULL; 638 639 zdev->next_bit = 0; 640 } 641 642 static int __init dma_alloc_cpu_table_caches(void) 643 { 644 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 645 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 646 0, NULL); 647 if (!dma_region_table_cache) 648 return -ENOMEM; 649 650 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 651 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 652 0, NULL); 653 if (!dma_page_table_cache) { 654 kmem_cache_destroy(dma_region_table_cache); 655 return -ENOMEM; 656 } 657 return 0; 658 } 659 660 int __init zpci_dma_init(void) 661 { 662 return dma_alloc_cpu_table_caches(); 663 } 664 665 void zpci_dma_exit(void) 666 { 667 kmem_cache_destroy(dma_page_table_cache); 668 kmem_cache_destroy(dma_region_table_cache); 669 } 670 671 const struct dma_map_ops s390_pci_dma_ops = { 672 .alloc = s390_dma_alloc, 673 .free = s390_dma_free, 674 .map_sg = s390_dma_map_sg, 675 .unmap_sg = s390_dma_unmap_sg, 676 .map_page = s390_dma_map_pages, 677 .unmap_page = s390_dma_unmap_pages, 678 .mapping_error = s390_mapping_error, 679 /* dma_supported is unconditionally true without a callback */ 680 }; 681 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 682 683 static int __init s390_iommu_setup(char *str) 684 { 685 if (!strncmp(str, "strict", 6)) 686 s390_iommu_strict = 1; 687 return 0; 688 } 689 690 __setup("s390_iommu=", s390_iommu_setup); 691