1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/export.h> 12 #include <linux/iommu-helper.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/vmalloc.h> 15 #include <linux/pci.h> 16 #include <asm/pci_dma.h> 17 18 #define S390_MAPPING_ERROR (~(dma_addr_t) 0x0) 19 20 static struct kmem_cache *dma_region_table_cache; 21 static struct kmem_cache *dma_page_table_cache; 22 static int s390_iommu_strict; 23 24 static int zpci_refresh_global(struct zpci_dev *zdev) 25 { 26 return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 27 zdev->iommu_pages * PAGE_SIZE); 28 } 29 30 unsigned long *dma_alloc_cpu_table(void) 31 { 32 unsigned long *table, *entry; 33 34 table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); 35 if (!table) 36 return NULL; 37 38 for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 39 *entry = ZPCI_TABLE_INVALID; 40 return table; 41 } 42 43 static void dma_free_cpu_table(void *table) 44 { 45 kmem_cache_free(dma_region_table_cache, table); 46 } 47 48 static unsigned long *dma_alloc_page_table(void) 49 { 50 unsigned long *table, *entry; 51 52 table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); 53 if (!table) 54 return NULL; 55 56 for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 57 *entry = ZPCI_PTE_INVALID; 58 return table; 59 } 60 61 static void dma_free_page_table(void *table) 62 { 63 kmem_cache_free(dma_page_table_cache, table); 64 } 65 66 static unsigned long *dma_get_seg_table_origin(unsigned long *entry) 67 { 68 unsigned long *sto; 69 70 if (reg_entry_isvalid(*entry)) 71 sto = get_rt_sto(*entry); 72 else { 73 sto = dma_alloc_cpu_table(); 74 if (!sto) 75 return NULL; 76 77 set_rt_sto(entry, sto); 78 validate_rt_entry(entry); 79 entry_clr_protected(entry); 80 } 81 return sto; 82 } 83 84 static unsigned long *dma_get_page_table_origin(unsigned long *entry) 85 { 86 unsigned long *pto; 87 88 if (reg_entry_isvalid(*entry)) 89 pto = get_st_pto(*entry); 90 else { 91 pto = dma_alloc_page_table(); 92 if (!pto) 93 return NULL; 94 set_st_pto(entry, pto); 95 validate_st_entry(entry); 96 entry_clr_protected(entry); 97 } 98 return pto; 99 } 100 101 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) 102 { 103 unsigned long *sto, *pto; 104 unsigned int rtx, sx, px; 105 106 rtx = calc_rtx(dma_addr); 107 sto = dma_get_seg_table_origin(&rto[rtx]); 108 if (!sto) 109 return NULL; 110 111 sx = calc_sx(dma_addr); 112 pto = dma_get_page_table_origin(&sto[sx]); 113 if (!pto) 114 return NULL; 115 116 px = calc_px(dma_addr); 117 return &pto[px]; 118 } 119 120 void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) 121 { 122 if (flags & ZPCI_PTE_INVALID) { 123 invalidate_pt_entry(entry); 124 } else { 125 set_pt_pfaa(entry, page_addr); 126 validate_pt_entry(entry); 127 } 128 129 if (flags & ZPCI_TABLE_PROTECTED) 130 entry_set_protected(entry); 131 else 132 entry_clr_protected(entry); 133 } 134 135 static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 136 dma_addr_t dma_addr, size_t size, int flags) 137 { 138 unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 139 u8 *page_addr = (u8 *) (pa & PAGE_MASK); 140 unsigned long irq_flags; 141 unsigned long *entry; 142 int i, rc = 0; 143 144 if (!nr_pages) 145 return -EINVAL; 146 147 spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); 148 if (!zdev->dma_table) { 149 rc = -EINVAL; 150 goto out_unlock; 151 } 152 153 for (i = 0; i < nr_pages; i++) { 154 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 155 if (!entry) { 156 rc = -ENOMEM; 157 goto undo_cpu_trans; 158 } 159 dma_update_cpu_trans(entry, page_addr, flags); 160 page_addr += PAGE_SIZE; 161 dma_addr += PAGE_SIZE; 162 } 163 164 undo_cpu_trans: 165 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 166 flags = ZPCI_PTE_INVALID; 167 while (i-- > 0) { 168 page_addr -= PAGE_SIZE; 169 dma_addr -= PAGE_SIZE; 170 entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 171 if (!entry) 172 break; 173 dma_update_cpu_trans(entry, page_addr, flags); 174 } 175 } 176 out_unlock: 177 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 178 return rc; 179 } 180 181 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 182 size_t size, int flags) 183 { 184 /* 185 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 186 * translations when previously invalid translation-table entries are 187 * validated. With lazy unmap, rpcit is skipped for previously valid 188 * entries, but a global rpcit is then required before any address can 189 * be re-used, i.e. after each iommu bitmap wrap-around. 190 */ 191 if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 192 if (!zdev->tlb_refresh) 193 return 0; 194 } else { 195 if (!s390_iommu_strict) 196 return 0; 197 } 198 199 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 200 PAGE_ALIGN(size)); 201 } 202 203 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 204 dma_addr_t dma_addr, size_t size, int flags) 205 { 206 int rc; 207 208 rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 209 if (rc) 210 return rc; 211 212 rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 213 if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 214 __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 215 216 return rc; 217 } 218 219 void dma_free_seg_table(unsigned long entry) 220 { 221 unsigned long *sto = get_rt_sto(entry); 222 int sx; 223 224 for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 225 if (reg_entry_isvalid(sto[sx])) 226 dma_free_page_table(get_st_pto(sto[sx])); 227 228 dma_free_cpu_table(sto); 229 } 230 231 void dma_cleanup_tables(unsigned long *table) 232 { 233 int rtx; 234 235 if (!table) 236 return; 237 238 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 239 if (reg_entry_isvalid(table[rtx])) 240 dma_free_seg_table(table[rtx]); 241 242 dma_free_cpu_table(table); 243 } 244 245 static unsigned long __dma_alloc_iommu(struct device *dev, 246 unsigned long start, int size) 247 { 248 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 249 unsigned long boundary_size; 250 251 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 252 PAGE_SIZE) >> PAGE_SHIFT; 253 return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 254 start, size, zdev->start_dma >> PAGE_SHIFT, 255 boundary_size, 0); 256 } 257 258 static dma_addr_t dma_alloc_address(struct device *dev, int size) 259 { 260 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 261 unsigned long offset, flags; 262 263 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 264 offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 265 if (offset == -1) { 266 if (!s390_iommu_strict) { 267 /* global flush before DMA addresses are reused */ 268 if (zpci_refresh_global(zdev)) 269 goto out_error; 270 271 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 272 zdev->lazy_bitmap, zdev->iommu_pages); 273 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 274 } 275 /* wrap-around */ 276 offset = __dma_alloc_iommu(dev, 0, size); 277 if (offset == -1) 278 goto out_error; 279 } 280 zdev->next_bit = offset + size; 281 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 282 283 return zdev->start_dma + offset * PAGE_SIZE; 284 285 out_error: 286 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 287 return S390_MAPPING_ERROR; 288 } 289 290 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 291 { 292 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 293 unsigned long flags, offset; 294 295 offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 296 297 spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 298 if (!zdev->iommu_bitmap) 299 goto out; 300 301 if (s390_iommu_strict) 302 bitmap_clear(zdev->iommu_bitmap, offset, size); 303 else 304 bitmap_set(zdev->lazy_bitmap, offset, size); 305 306 out: 307 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 308 } 309 310 static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 311 { 312 struct { 313 unsigned long rc; 314 unsigned long addr; 315 } __packed data = {rc, addr}; 316 317 zpci_err_hex(&data, sizeof(data)); 318 } 319 320 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 321 unsigned long offset, size_t size, 322 enum dma_data_direction direction, 323 unsigned long attrs) 324 { 325 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 326 unsigned long pa = page_to_phys(page) + offset; 327 int flags = ZPCI_PTE_VALID; 328 unsigned long nr_pages; 329 dma_addr_t dma_addr; 330 int ret; 331 332 /* This rounds up number of pages based on size and offset */ 333 nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 334 dma_addr = dma_alloc_address(dev, nr_pages); 335 if (dma_addr == S390_MAPPING_ERROR) { 336 ret = -ENOSPC; 337 goto out_err; 338 } 339 340 /* Use rounded up size */ 341 size = nr_pages * PAGE_SIZE; 342 343 if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 344 flags |= ZPCI_TABLE_PROTECTED; 345 346 ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 347 if (ret) 348 goto out_free; 349 350 atomic64_add(nr_pages, &zdev->mapped_pages); 351 return dma_addr + (offset & ~PAGE_MASK); 352 353 out_free: 354 dma_free_address(dev, dma_addr, nr_pages); 355 out_err: 356 zpci_err("map error:\n"); 357 zpci_err_dma(ret, pa); 358 return S390_MAPPING_ERROR; 359 } 360 361 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 362 size_t size, enum dma_data_direction direction, 363 unsigned long attrs) 364 { 365 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 366 int npages, ret; 367 368 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 369 dma_addr = dma_addr & PAGE_MASK; 370 ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 371 ZPCI_PTE_INVALID); 372 if (ret) { 373 zpci_err("unmap error:\n"); 374 zpci_err_dma(ret, dma_addr); 375 return; 376 } 377 378 atomic64_add(npages, &zdev->unmapped_pages); 379 dma_free_address(dev, dma_addr, npages); 380 } 381 382 static void *s390_dma_alloc(struct device *dev, size_t size, 383 dma_addr_t *dma_handle, gfp_t flag, 384 unsigned long attrs) 385 { 386 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 387 struct page *page; 388 unsigned long pa; 389 dma_addr_t map; 390 391 size = PAGE_ALIGN(size); 392 page = alloc_pages(flag, get_order(size)); 393 if (!page) 394 return NULL; 395 396 pa = page_to_phys(page); 397 map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 398 if (dma_mapping_error(dev, map)) { 399 free_pages(pa, get_order(size)); 400 return NULL; 401 } 402 403 atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 404 if (dma_handle) 405 *dma_handle = map; 406 return (void *) pa; 407 } 408 409 static void s390_dma_free(struct device *dev, size_t size, 410 void *pa, dma_addr_t dma_handle, 411 unsigned long attrs) 412 { 413 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 414 415 size = PAGE_ALIGN(size); 416 atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 417 s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 418 free_pages((unsigned long) pa, get_order(size)); 419 } 420 421 /* Map a segment into a contiguous dma address area */ 422 static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 423 size_t size, dma_addr_t *handle, 424 enum dma_data_direction dir) 425 { 426 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 427 struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 428 dma_addr_t dma_addr_base, dma_addr; 429 int flags = ZPCI_PTE_VALID; 430 struct scatterlist *s; 431 unsigned long pa = 0; 432 int ret; 433 434 dma_addr_base = dma_alloc_address(dev, nr_pages); 435 if (dma_addr_base == S390_MAPPING_ERROR) 436 return -ENOMEM; 437 438 dma_addr = dma_addr_base; 439 if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 440 flags |= ZPCI_TABLE_PROTECTED; 441 442 for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 443 pa = page_to_phys(sg_page(s)); 444 ret = __dma_update_trans(zdev, pa, dma_addr, 445 s->offset + s->length, flags); 446 if (ret) 447 goto unmap; 448 449 dma_addr += s->offset + s->length; 450 } 451 ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 452 if (ret) 453 goto unmap; 454 455 *handle = dma_addr_base; 456 atomic64_add(nr_pages, &zdev->mapped_pages); 457 458 return ret; 459 460 unmap: 461 dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 462 ZPCI_PTE_INVALID); 463 dma_free_address(dev, dma_addr_base, nr_pages); 464 zpci_err("map error:\n"); 465 zpci_err_dma(ret, pa); 466 return ret; 467 } 468 469 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 470 int nr_elements, enum dma_data_direction dir, 471 unsigned long attrs) 472 { 473 struct scatterlist *s = sg, *start = sg, *dma = sg; 474 unsigned int max = dma_get_max_seg_size(dev); 475 unsigned int size = s->offset + s->length; 476 unsigned int offset = s->offset; 477 int count = 0, i; 478 479 for (i = 1; i < nr_elements; i++) { 480 s = sg_next(s); 481 482 s->dma_address = S390_MAPPING_ERROR; 483 s->dma_length = 0; 484 485 if (s->offset || (size & ~PAGE_MASK) || 486 size + s->length > max) { 487 if (__s390_dma_map_sg(dev, start, size, 488 &dma->dma_address, dir)) 489 goto unmap; 490 491 dma->dma_address += offset; 492 dma->dma_length = size - offset; 493 494 size = offset = s->offset; 495 start = s; 496 dma = sg_next(dma); 497 count++; 498 } 499 size += s->length; 500 } 501 if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) 502 goto unmap; 503 504 dma->dma_address += offset; 505 dma->dma_length = size - offset; 506 507 return count + 1; 508 unmap: 509 for_each_sg(sg, s, count, i) 510 s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 511 dir, attrs); 512 513 return 0; 514 } 515 516 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 517 int nr_elements, enum dma_data_direction dir, 518 unsigned long attrs) 519 { 520 struct scatterlist *s; 521 int i; 522 523 for_each_sg(sg, s, nr_elements, i) { 524 if (s->dma_length) 525 s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 526 dir, attrs); 527 s->dma_address = 0; 528 s->dma_length = 0; 529 } 530 } 531 532 static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr) 533 { 534 return dma_addr == S390_MAPPING_ERROR; 535 } 536 537 int zpci_dma_init_device(struct zpci_dev *zdev) 538 { 539 int rc; 540 541 /* 542 * At this point, if the device is part of an IOMMU domain, this would 543 * be a strong hint towards a bug in the IOMMU API (common) code and/or 544 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 545 */ 546 WARN_ON(zdev->s390_domain); 547 548 spin_lock_init(&zdev->iommu_bitmap_lock); 549 spin_lock_init(&zdev->dma_table_lock); 550 551 zdev->dma_table = dma_alloc_cpu_table(); 552 if (!zdev->dma_table) { 553 rc = -ENOMEM; 554 goto out; 555 } 556 557 /* 558 * Restrict the iommu bitmap size to the minimum of the following: 559 * - main memory size 560 * - 3-level pagetable address limit minus start_dma offset 561 * - DMA address range allowed by the hardware (clp query pci fn) 562 * 563 * Also set zdev->end_dma to the actual end address of the usable 564 * range, instead of the theoretical maximum as reported by hardware. 565 */ 566 zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 567 zdev->iommu_size = min3((u64) high_memory, 568 ZPCI_TABLE_SIZE_RT - zdev->start_dma, 569 zdev->end_dma - zdev->start_dma + 1); 570 zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 571 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 572 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 573 if (!zdev->iommu_bitmap) { 574 rc = -ENOMEM; 575 goto free_dma_table; 576 } 577 if (!s390_iommu_strict) { 578 zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 579 if (!zdev->lazy_bitmap) { 580 rc = -ENOMEM; 581 goto free_bitmap; 582 } 583 584 } 585 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 586 (u64) zdev->dma_table); 587 if (rc) 588 goto free_bitmap; 589 590 return 0; 591 free_bitmap: 592 vfree(zdev->iommu_bitmap); 593 zdev->iommu_bitmap = NULL; 594 vfree(zdev->lazy_bitmap); 595 zdev->lazy_bitmap = NULL; 596 free_dma_table: 597 dma_free_cpu_table(zdev->dma_table); 598 zdev->dma_table = NULL; 599 out: 600 return rc; 601 } 602 603 void zpci_dma_exit_device(struct zpci_dev *zdev) 604 { 605 /* 606 * At this point, if the device is part of an IOMMU domain, this would 607 * be a strong hint towards a bug in the IOMMU API (common) code and/or 608 * simultaneous access via IOMMU and DMA API. So let's issue a warning. 609 */ 610 WARN_ON(zdev->s390_domain); 611 612 if (zpci_unregister_ioat(zdev, 0)) 613 return; 614 615 dma_cleanup_tables(zdev->dma_table); 616 zdev->dma_table = NULL; 617 vfree(zdev->iommu_bitmap); 618 zdev->iommu_bitmap = NULL; 619 vfree(zdev->lazy_bitmap); 620 zdev->lazy_bitmap = NULL; 621 622 zdev->next_bit = 0; 623 } 624 625 static int __init dma_alloc_cpu_table_caches(void) 626 { 627 dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 628 ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 629 0, NULL); 630 if (!dma_region_table_cache) 631 return -ENOMEM; 632 633 dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 634 ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 635 0, NULL); 636 if (!dma_page_table_cache) { 637 kmem_cache_destroy(dma_region_table_cache); 638 return -ENOMEM; 639 } 640 return 0; 641 } 642 643 int __init zpci_dma_init(void) 644 { 645 return dma_alloc_cpu_table_caches(); 646 } 647 648 void zpci_dma_exit(void) 649 { 650 kmem_cache_destroy(dma_page_table_cache); 651 kmem_cache_destroy(dma_region_table_cache); 652 } 653 654 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 655 656 static int __init dma_debug_do_init(void) 657 { 658 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 659 return 0; 660 } 661 fs_initcall(dma_debug_do_init); 662 663 const struct dma_map_ops s390_pci_dma_ops = { 664 .alloc = s390_dma_alloc, 665 .free = s390_dma_free, 666 .map_sg = s390_dma_map_sg, 667 .unmap_sg = s390_dma_unmap_sg, 668 .map_page = s390_dma_map_pages, 669 .unmap_page = s390_dma_unmap_pages, 670 .mapping_error = s390_mapping_error, 671 /* if we support direct DMA this must be conditional */ 672 .is_phys = 0, 673 /* dma_supported is unconditionally true without a callback */ 674 }; 675 EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 676 677 static int __init s390_iommu_setup(char *str) 678 { 679 if (!strncmp(str, "strict", 6)) 680 s390_iommu_strict = 1; 681 return 0; 682 } 683 684 __setup("s390_iommu=", s390_iommu_setup); 685