1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/export.h> 12 #include <linux/iommu-helper.h> 13 #include <linux/dma-map-ops.h> 14 #include <linux/vmalloc.h> 15 #include <linux/pci.h> 16 #include <asm/pci_dma.h> 17 18 static struct kmem_cache *dma_region_table_cache; 19 static struct kmem_cache *dma_page_table_cache; 20 static int s390_iommu_strict; 21 22 static int zpci_refresh_global(struct zpci_dev *zdev) 23 { 24 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 25 zdev->iommu_pages * PAGE_SIZE); 26 } 27 28 unsigned long *dma_alloc_cpu_table(void) 29 { 30 unsigned long *table, *entry; 31 32 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 33 if (!table) 34 return NULL; 35 36 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 37 *entry = ZPCI_TABLE_INVALID; 38 return table; 39 } 40 41 static void dma_free_cpu_table(void *table) 42 { 43 kmem_cache_free(dma_region_table_cache, table); 44 } 45 46 static unsigned long *dma_alloc_page_table(void) 47 { 48 unsigned long *table, *entry; 49 50 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 51 if (!table) 52 return NULL; 53 54 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 55 *entry = ZPCI_PTE_INVALID; 56 return table; 57 } 58 59 static void dma_free_page_table(void *table) 60 { 61 kmem_cache_free(dma_page_table_cache, table); 62 } 63 64 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 65 { 66 unsigned long *sto; 67 68 if (reg_entry_isvalid(*entry)) 69 sto = get_rt_sto(*entry); 70 else { 71 sto = dma_alloc_cpu_table(); 72 if (!sto) 73 return NULL; 74 75 set_rt_sto(entry, sto); 76 validate_rt_entry(entry); 77 entry_clr_protected(entry); 78 } 79 return sto; 80 } 81 82 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 83 { 84 unsigned long *pto; 85 86 if (reg_entry_isvalid(*entry)) 87 pto = get_st_pto(*entry); 88 else { 89 pto = dma_alloc_page_table(); 90 if (!pto) 91 return NULL; 92 set_st_pto(entry, pto); 93 validate_st_entry(entry); 94 entry_clr_protected(entry); 95 } 96 return pto; 97 } 98 99 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 100 { 101 unsigned long *sto, *pto; 102 unsigned int rtx, sx, px; 103 104 rtx = calc_rtx(dma_addr); 105 sto = dma_get_seg_table_origin(&rto[rtx]); 106 if (!sto) 107 return NULL; 108 109 sx = calc_sx(dma_addr); 110 pto = dma_get_page_table_origin(&sto[sx]); 111 if (!pto) 112 return NULL; 113 114 px = calc_px(dma_addr); 115 return &pto[px]; 116 } 117 118 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 119 { 120 if (flags & ZPCI_PTE_INVALID) { 121 invalidate_pt_entry(entry); 122 } else { 123 set_pt_pfaa(entry, page_addr); 124 validate_pt_entry(entry); 125 } 126 127 if (flags & ZPCI_TABLE_PROTECTED) 128 entry_set_protected(entry); 129 else 130 entry_clr_protected(entry); 131 } 132 133 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 134 dma_addr_t dma_addr, size_t size, int flags) 135 { 136 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 137 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 138 unsigned long irq_flags; 139 unsigned long *entry; 140 int i, rc = 0; 141 142 if (!nr_pages) 143 return -EINVAL; 144 145 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 146 if (!zdev->dma_table) { 147 rc = -EINVAL; 148 goto out_unlock; 149 } 150 151 for (i = 0; i < nr_pages; i++) { 152 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 153 if (!entry) { 154 rc = -ENOMEM; 155 goto undo_cpu_trans; 156 } 157 dma_update_cpu_trans(entry, page_addr, flags); 158 page_addr += PAGE_SIZE; 159 dma_addr += PAGE_SIZE; 160 } 161 162 undo_cpu_trans: 163 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 164 flags = ZPCI_PTE_INVALID; 165 while (i-- > 0) { 166 page_addr -= PAGE_SIZE; 167 dma_addr -= PAGE_SIZE; 168 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 169 if (!entry) 170 break; 171 dma_update_cpu_trans(entry, page_addr, flags); 172 } 173 } 174 out_unlock: 175 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 176 return rc; 177 } 178 179 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 180 size_t size, int flags) 181 { 182 unsigned long irqflags; 183 int ret; 184 185 /* 186 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 187 * translations when previously invalid translation-table entries are 188 * validated. With lazy unmap, rpcit is skipped for previously valid 189 * entries, but a global rpcit is then required before any address can 190 * be re-used, i.e. after each iommu bitmap wrap-around. 191 */ 192 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 193 if (!zdev->tlb_refresh) 194 return 0; 195 } else { 196 if (!s390_iommu_strict) 197 return 0; 198 } 199 200 ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 201 PAGE_ALIGN(size)); 202 if (ret == -ENOMEM && !s390_iommu_strict) { 203 /* enable the hypervisor to free some resources */ 204 if (zpci_refresh_global(zdev)) 205 goto out; 206 207 spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); 208 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 209 zdev->lazy_bitmap, zdev->iommu_pages); 210 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 211 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); 212 ret = 0; 213 } 214 out: 215 return ret; 216 } 217 218 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 219 dma_addr_t dma_addr, size_t size, int flags) 220 { 221 int rc; 222 223 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 224 if (rc) 225 return rc; 226 227 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 228 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 229 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 230 231 return rc; 232 } 233 234 void dma_free_seg_table(unsigned long entry) 235 { 236 unsigned long *sto = get_rt_sto(entry); 237 int sx; 238 239 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 240 if (reg_entry_isvalid(sto[sx])) 241 dma_free_page_table(get_st_pto(sto[sx])); 242 243 dma_free_cpu_table(sto); 244 } 245 246 void dma_cleanup_tables(unsigned long *table) 247 { 248 int rtx; 249 250 if (!table) 251 return; 252 253 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 254 if (reg_entry_isvalid(table[rtx])) 255 dma_free_seg_table(table[rtx]); 256 257 dma_free_cpu_table(table); 258 } 259 260 static unsigned long __dma_alloc_iommu(struct device *dev, 261 unsigned long start, int size) 262 { 263 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 264 265 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 266 start, size, zdev->start_dma >> PAGE_SHIFT, 267 dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), 268 0); 269 } 270 271 static dma_addr_t dma_alloc_address(struct device *dev, int size) 272 { 273 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 274 unsigned long offset, flags; 275 276 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 277 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 278 if (offset == -1) { 279 if (!s390_iommu_strict) { 280 /* global flush before DMA addresses are reused */ 281 if (zpci_refresh_global(zdev)) 282 goto out_error; 283 284 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 285 zdev->lazy_bitmap, zdev->iommu_pages); 286 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 287 } 288 /* wrap-around */ 289 offset = __dma_alloc_iommu(dev, 0, size); 290 if (offset == -1) 291 goto out_error; 292 } 293 zdev->next_bit = offset + size; 294 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 295 296 return zdev->start_dma + offset * PAGE_SIZE; 297 298 out_error: 299 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 300 return DMA_MAPPING_ERROR; 301 } 302 303 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 304 { 305 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 306 unsigned long flags, offset; 307 308 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 309 310 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 311 if (!zdev->iommu_bitmap) 312 goto out; 313 314 if (s390_iommu_strict) 315 bitmap_clear(zdev->iommu_bitmap, offset, size); 316 else 317 bitmap_set(zdev->lazy_bitmap, offset, size); 318 319 out: 320 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 321 } 322 323 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 324 { 325 struct { 326 unsigned long rc; 327 unsigned long addr; 328 } __packed data = {rc, addr}; 329 330 zpci_err_hex(&data, sizeof(data)); 331 } 332 333 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 334 unsigned long offset, size_t size, 335 enum dma_data_direction direction, 336 unsigned long attrs) 337 { 338 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 339 unsigned long pa = page_to_phys(page) + offset; 340 int flags = ZPCI_PTE_VALID; 341 unsigned long nr_pages; 342 dma_addr_t dma_addr; 343 int ret; 344 345 /* This rounds up number of pages based on size and offset */ 346 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 347 dma_addr = dma_alloc_address(dev, nr_pages); 348 if (dma_addr == DMA_MAPPING_ERROR) { 349 ret = -ENOSPC; 350 goto out_err; 351 } 352 353 /* Use rounded up size */ 354 size = nr_pages * PAGE_SIZE; 355 356 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 357 flags |= ZPCI_TABLE_PROTECTED; 358 359 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 360 if (ret) 361 goto out_free; 362 363 atomic64_add(nr_pages, &zdev->mapped_pages); 364 return dma_addr + (offset & ~PAGE_MASK); 365 366 out_free: 367 dma_free_address(dev, dma_addr, nr_pages); 368 out_err: 369 zpci_err("map error:\n"); 370 zpci_err_dma(ret, pa); 371 return DMA_MAPPING_ERROR; 372 } 373 374 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 375 size_t size, enum dma_data_direction direction, 376 unsigned long attrs) 377 { 378 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 379 int npages, ret; 380 381 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 382 dma_addr = dma_addr & PAGE_MASK; 383 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 384 ZPCI_PTE_INVALID); 385 if (ret) { 386 zpci_err("unmap error:\n"); 387 zpci_err_dma(ret, dma_addr); 388 return; 389 } 390 391 atomic64_add(npages, &zdev->unmapped_pages); 392 dma_free_address(dev, dma_addr, npages); 393 } 394 395 static void *s390_dma_alloc(struct device *dev, size_t size, 396 dma_addr_t *dma_handle, gfp_t flag, 397 unsigned long attrs) 398 { 399 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 400 struct page *page; 401 unsigned long pa; 402 dma_addr_t map; 403 404 size = PAGE_ALIGN(size); 405 page = alloc_pages(flag | __GFP_ZERO, get_order(size)); 406 if (!page) 407 return NULL; 408 409 pa = page_to_phys(page); 410 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 411 if (dma_mapping_error(dev, map)) { 412 free_pages(pa, get_order(size)); 413 return NULL; 414 } 415 416 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 417 if (dma_handle) 418 *dma_handle = map; 419 return (void *) pa; 420 } 421 422 static void s390_dma_free(struct device *dev, size_t size, 423 void *pa, dma_addr_t dma_handle, 424 unsigned long attrs) 425 { 426 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 427 428 size = PAGE_ALIGN(size); 429 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 430 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 431 free_pages((unsigned long) pa, get_order(size)); 432 } 433 434 /* Map a segment into a contiguous dma address area */ 435 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 436 size_t size, dma_addr_t *handle, 437 enum dma_data_direction dir) 438 { 439 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 440 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 441 dma_addr_t dma_addr_base, dma_addr; 442 int flags = ZPCI_PTE_VALID; 443 struct scatterlist *s; 444 unsigned long pa = 0; 445 int ret; 446 447 dma_addr_base = dma_alloc_address(dev, nr_pages); 448 if (dma_addr_base == DMA_MAPPING_ERROR) 449 return -ENOMEM; 450 451 dma_addr = dma_addr_base; 452 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 453 flags |= ZPCI_TABLE_PROTECTED; 454 455 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 456 pa = page_to_phys(sg_page(s)); 457 ret = __dma_update_trans(zdev, pa, dma_addr, 458 s->offset + s->length, flags); 459 if (ret) 460 goto unmap; 461 462 dma_addr += s->offset + s->length; 463 } 464 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 465 if (ret) 466 goto unmap; 467 468 *handle = dma_addr_base; 469 atomic64_add(nr_pages, &zdev->mapped_pages); 470 471 return ret; 472 473 unmap: 474 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 475 ZPCI_PTE_INVALID); 476 dma_free_address(dev, dma_addr_base, nr_pages); 477 zpci_err("map error:\n"); 478 zpci_err_dma(ret, pa); 479 return ret; 480 } 481 482 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 483 int nr_elements, enum dma_data_direction dir, 484 unsigned long attrs) 485 { 486 struct scatterlist *s = sg, *start = sg, *dma = sg; 487 unsigned int max = dma_get_max_seg_size(dev); 488 unsigned int size = s->offset + s->length; 489 unsigned int offset = s->offset; 490 int count = 0, i; 491 492 for (i = 1; i < nr_elements; i++) { 493 s = sg_next(s); 494 495 s->dma_address = DMA_MAPPING_ERROR; 496 s->dma_length = 0; 497 498 if (s->offset || (size & ~PAGE_MASK) || 499 size + s->length > max) { 500 if (__s390_dma_map_sg(dev, start, size, 501 &dma->dma_address, dir)) 502 goto unmap; 503 504 dma->dma_address += offset; 505 dma->dma_length = size - offset; 506 507 size = offset = s->offset; 508 start = s; 509 dma = sg_next(dma); 510 count++; 511 } 512 size += s->length; 513 } 514 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 515 goto unmap; 516 517 dma->dma_address += offset; 518 dma->dma_length = size - offset; 519 520 return count + 1; 521 unmap: 522 for_each_sg(sg, s, count, i) 523 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 524 dir, attrs); 525 526 return 0; 527 } 528 529 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 530 int nr_elements, enum dma_data_direction dir, 531 unsigned long attrs) 532 { 533 struct scatterlist *s; 534 int i; 535 536 for_each_sg(sg, s, nr_elements, i) { 537 if (s->dma_length) 538 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 539 dir, attrs); 540 s->dma_address = 0; 541 s->dma_length = 0; 542 } 543 } 544 545 int zpci_dma_init_device(struct zpci_dev *zdev) 546 { 547 int rc; 548 549 /* 550 * At this point, if the device is part of an IOMMU domain, this would 551 * be a strong hint towards a bug in the IOMMU API (common) code and/or 552 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 553 */ 554 WARN_ON(zdev->s390_domain); 555 556 spin_lock_init(&zdev->iommu_bitmap_lock); 557 spin_lock_init(&zdev->dma_table_lock); 558 559 zdev->dma_table = dma_alloc_cpu_table(); 560 if (!zdev->dma_table) { 561 rc = -ENOMEM; 562 goto out; 563 } 564 565 /* 566 * Restrict the iommu bitmap size to the minimum of the following: 567 * - main memory size 568 * - 3-level pagetable address limit minus start_dma offset 569 * - DMA address range allowed by the hardware (clp query pci fn) 570 * 571 * Also set zdev->end_dma to the actual end address of the usable 572 * range, instead of the theoretical maximum as reported by hardware. 573 */ 574 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 575 zdev->iommu_size = min3((u64) high_memory, 576 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 577 zdev->end_dma - zdev->start_dma + 1); 578 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 579 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 580 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 581 if (!zdev->iommu_bitmap) { 582 rc = -ENOMEM; 583 goto free_dma_table; 584 } 585 if (!s390_iommu_strict) { 586 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 587 if (!zdev->lazy_bitmap) { 588 rc = -ENOMEM; 589 goto free_bitmap; 590 } 591 592 } 593 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 594 (u64) zdev->dma_table); 595 if (rc) 596 goto free_bitmap; 597 598 return 0; 599 free_bitmap: 600 vfree(zdev->iommu_bitmap); 601 zdev->iommu_bitmap = NULL; 602 vfree(zdev->lazy_bitmap); 603 zdev->lazy_bitmap = NULL; 604 free_dma_table: 605 dma_free_cpu_table(zdev->dma_table); 606 zdev->dma_table = NULL; 607 out: 608 return rc; 609 } 610 611 void zpci_dma_exit_device(struct zpci_dev *zdev) 612 { 613 /* 614 * At this point, if the device is part of an IOMMU domain, this would 615 * be a strong hint towards a bug in the IOMMU API (common) code and/or 616 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 617 */ 618 WARN_ON(zdev->s390_domain); 619 620 if (zpci_unregister_ioat(zdev, 0)) 621 return; 622 623 dma_cleanup_tables(zdev->dma_table); 624 zdev->dma_table = NULL; 625 vfree(zdev->iommu_bitmap); 626 zdev->iommu_bitmap = NULL; 627 vfree(zdev->lazy_bitmap); 628 zdev->lazy_bitmap = NULL; 629 630 zdev->next_bit = 0; 631 } 632 633 static int __init dma_alloc_cpu_table_caches(void) 634 { 635 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 636 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 637 0, NULL); 638 if (!dma_region_table_cache) 639 return -ENOMEM; 640 641 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 642 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 643 0, NULL); 644 if (!dma_page_table_cache) { 645 kmem_cache_destroy(dma_region_table_cache); 646 return -ENOMEM; 647 } 648 return 0; 649 } 650 651 int __init zpci_dma_init(void) 652 { 653 return dma_alloc_cpu_table_caches(); 654 } 655 656 void zpci_dma_exit(void) 657 { 658 kmem_cache_destroy(dma_page_table_cache); 659 kmem_cache_destroy(dma_region_table_cache); 660 } 661 662 const struct dma_map_ops s390_pci_dma_ops = { 663 .alloc = s390_dma_alloc, 664 .free = s390_dma_free, 665 .map_sg = s390_dma_map_sg, 666 .unmap_sg = s390_dma_unmap_sg, 667 .map_page = s390_dma_map_pages, 668 .unmap_page = s390_dma_unmap_pages, 669 .mmap = dma_common_mmap, 670 .get_sgtable = dma_common_get_sgtable, 671 .alloc_pages = dma_common_alloc_pages, 672 .free_pages = dma_common_free_pages, 673 /* dma_supported is unconditionally true without a callback */ 674 }; 675 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 676 677 static int __init s390_iommu_setup(char *str) 678 { 679 if (!strcmp(str, "strict")) 680 s390_iommu_strict = 1; 681 return 1; 682 } 683 684 __setup("s390_iommu=", s390_iommu_setup); 685