1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * MMU-based software IOTLB. 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <linux/anon_inodes.h> 14 #include <linux/highmem.h> 15 #include <linux/vmalloc.h> 16 #include <linux/vdpa.h> 17 18 #include "iova_domain.h" 19 20 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, 21 u64 start, u64 last, 22 u64 addr, unsigned int perm, 23 struct file *file, u64 offset) 24 { 25 struct vdpa_map_file *map_file; 26 int ret; 27 28 map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); 29 if (!map_file) 30 return -ENOMEM; 31 32 map_file->file = get_file(file); 33 map_file->offset = offset; 34 35 ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, 36 addr, perm, map_file); 37 if (ret) { 38 fput(map_file->file); 39 kfree(map_file); 40 return ret; 41 } 42 return 0; 43 } 44 45 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, 46 u64 start, u64 last) 47 { 48 struct vdpa_map_file *map_file; 49 struct vhost_iotlb_map *map; 50 51 while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { 52 map_file = (struct vdpa_map_file *)map->opaque; 53 fput(map_file->file); 54 kfree(map_file); 55 vhost_iotlb_map_free(domain->iotlb, map); 56 } 57 } 58 59 int vduse_domain_set_map(struct vduse_iova_domain *domain, 60 struct vhost_iotlb *iotlb) 61 { 62 struct vdpa_map_file *map_file; 63 struct vhost_iotlb_map *map; 64 u64 start = 0ULL, last = ULLONG_MAX; 65 int ret; 66 67 spin_lock(&domain->iotlb_lock); 68 vduse_iotlb_del_range(domain, start, last); 69 70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 71 map = vhost_iotlb_itree_next(map, start, last)) { 72 map_file = (struct vdpa_map_file *)map->opaque; 73 ret = vduse_iotlb_add_range(domain, map->start, map->last, 74 map->addr, map->perm, 75 map_file->file, 76 map_file->offset); 77 if (ret) 78 goto err; 79 } 80 spin_unlock(&domain->iotlb_lock); 81 82 return 0; 83 err: 84 vduse_iotlb_del_range(domain, start, last); 85 spin_unlock(&domain->iotlb_lock); 86 return ret; 87 } 88 89 void vduse_domain_clear_map(struct vduse_iova_domain *domain, 90 struct vhost_iotlb *iotlb) 91 { 92 struct vhost_iotlb_map *map; 93 u64 start = 0ULL, last = ULLONG_MAX; 94 95 spin_lock(&domain->iotlb_lock); 96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 97 map = vhost_iotlb_itree_next(map, start, last)) { 98 vduse_iotlb_del_range(domain, map->start, map->last); 99 } 100 spin_unlock(&domain->iotlb_lock); 101 } 102 103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, 104 u64 iova, u64 size, u64 paddr) 105 { 106 struct vduse_bounce_map *map; 107 u64 last = iova + size - 1; 108 109 while (iova <= last) { 110 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 111 if (!map->bounce_page) { 112 map->bounce_page = alloc_page(GFP_ATOMIC); 113 if (!map->bounce_page) 114 return -ENOMEM; 115 } 116 map->orig_phys = paddr; 117 paddr += PAGE_SIZE; 118 iova += PAGE_SIZE; 119 } 120 return 0; 121 } 122 123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, 124 u64 iova, u64 size) 125 { 126 struct vduse_bounce_map *map; 127 u64 last = iova + size - 1; 128 129 while (iova <= last) { 130 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 131 map->orig_phys = INVALID_PHYS_ADDR; 132 iova += PAGE_SIZE; 133 } 134 } 135 136 static void do_bounce(phys_addr_t orig, void *addr, size_t size, 137 enum dma_data_direction dir) 138 { 139 unsigned long pfn = PFN_DOWN(orig); 140 unsigned int offset = offset_in_page(orig); 141 char *buffer; 142 unsigned int sz = 0; 143 144 while (size) { 145 sz = min_t(size_t, PAGE_SIZE - offset, size); 146 147 buffer = kmap_atomic(pfn_to_page(pfn)); 148 if (dir == DMA_TO_DEVICE) 149 memcpy(addr, buffer + offset, sz); 150 else 151 memcpy(buffer + offset, addr, sz); 152 kunmap_atomic(buffer); 153 154 size -= sz; 155 pfn++; 156 addr += sz; 157 offset = 0; 158 } 159 } 160 161 static void vduse_domain_bounce(struct vduse_iova_domain *domain, 162 dma_addr_t iova, size_t size, 163 enum dma_data_direction dir) 164 { 165 struct vduse_bounce_map *map; 166 unsigned int offset; 167 void *addr; 168 size_t sz; 169 170 if (iova >= domain->bounce_size) 171 return; 172 173 while (size) { 174 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 175 offset = offset_in_page(iova); 176 sz = min_t(size_t, PAGE_SIZE - offset, size); 177 178 if (WARN_ON(!map->bounce_page || 179 map->orig_phys == INVALID_PHYS_ADDR)) 180 return; 181 182 addr = page_address(map->bounce_page) + offset; 183 do_bounce(map->orig_phys + offset, addr, sz, dir); 184 size -= sz; 185 iova += sz; 186 } 187 } 188 189 static struct page * 190 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova) 191 { 192 u64 start = iova & PAGE_MASK; 193 u64 last = start + PAGE_SIZE - 1; 194 struct vhost_iotlb_map *map; 195 struct page *page = NULL; 196 197 spin_lock(&domain->iotlb_lock); 198 map = vhost_iotlb_itree_first(domain->iotlb, start, last); 199 if (!map) 200 goto out; 201 202 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT); 203 get_page(page); 204 out: 205 spin_unlock(&domain->iotlb_lock); 206 207 return page; 208 } 209 210 static struct page * 211 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) 212 { 213 struct vduse_bounce_map *map; 214 struct page *page = NULL; 215 216 spin_lock(&domain->iotlb_lock); 217 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 218 if (!map->bounce_page) 219 goto out; 220 221 page = map->bounce_page; 222 get_page(page); 223 out: 224 spin_unlock(&domain->iotlb_lock); 225 226 return page; 227 } 228 229 static void 230 vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) 231 { 232 struct vduse_bounce_map *map; 233 unsigned long pfn, bounce_pfns; 234 235 bounce_pfns = domain->bounce_size >> PAGE_SHIFT; 236 237 for (pfn = 0; pfn < bounce_pfns; pfn++) { 238 map = &domain->bounce_maps[pfn]; 239 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR)) 240 continue; 241 242 if (!map->bounce_page) 243 continue; 244 245 __free_page(map->bounce_page); 246 map->bounce_page = NULL; 247 } 248 } 249 250 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) 251 { 252 if (!domain->bounce_map) 253 return; 254 255 spin_lock(&domain->iotlb_lock); 256 if (!domain->bounce_map) 257 goto unlock; 258 259 vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1); 260 domain->bounce_map = 0; 261 unlock: 262 spin_unlock(&domain->iotlb_lock); 263 } 264 265 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain) 266 { 267 int ret = 0; 268 269 if (domain->bounce_map) 270 return 0; 271 272 spin_lock(&domain->iotlb_lock); 273 if (domain->bounce_map) 274 goto unlock; 275 276 ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1, 277 0, VHOST_MAP_RW, domain->file, 0); 278 if (ret) 279 goto unlock; 280 281 domain->bounce_map = 1; 282 unlock: 283 spin_unlock(&domain->iotlb_lock); 284 return ret; 285 } 286 287 static dma_addr_t 288 vduse_domain_alloc_iova(struct iova_domain *iovad, 289 unsigned long size, unsigned long limit) 290 { 291 unsigned long shift = iova_shift(iovad); 292 unsigned long iova_len = iova_align(iovad, size) >> shift; 293 unsigned long iova_pfn; 294 295 /* 296 * Freeing non-power-of-two-sized allocations back into the IOVA caches 297 * will come back to bite us badly, so we have to waste a bit of space 298 * rounding up anything cacheable to make sure that can't happen. The 299 * order of the unadjusted size will still match upon freeing. 300 */ 301 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 302 iova_len = roundup_pow_of_two(iova_len); 303 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); 304 305 return iova_pfn << shift; 306 } 307 308 static void vduse_domain_free_iova(struct iova_domain *iovad, 309 dma_addr_t iova, size_t size) 310 { 311 unsigned long shift = iova_shift(iovad); 312 unsigned long iova_len = iova_align(iovad, size) >> shift; 313 314 free_iova_fast(iovad, iova >> shift, iova_len); 315 } 316 317 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, 318 struct page *page, unsigned long offset, 319 size_t size, enum dma_data_direction dir, 320 unsigned long attrs) 321 { 322 struct iova_domain *iovad = &domain->stream_iovad; 323 unsigned long limit = domain->bounce_size - 1; 324 phys_addr_t pa = page_to_phys(page) + offset; 325 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 326 327 if (!iova) 328 return DMA_MAPPING_ERROR; 329 330 if (vduse_domain_init_bounce_map(domain)) 331 goto err; 332 333 if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) 334 goto err; 335 336 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 337 vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); 338 339 return iova; 340 err: 341 vduse_domain_free_iova(iovad, iova, size); 342 return DMA_MAPPING_ERROR; 343 } 344 345 void vduse_domain_unmap_page(struct vduse_iova_domain *domain, 346 dma_addr_t dma_addr, size_t size, 347 enum dma_data_direction dir, unsigned long attrs) 348 { 349 struct iova_domain *iovad = &domain->stream_iovad; 350 351 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 352 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); 353 354 vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); 355 vduse_domain_free_iova(iovad, dma_addr, size); 356 } 357 358 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, 359 size_t size, dma_addr_t *dma_addr, 360 gfp_t flag, unsigned long attrs) 361 { 362 struct iova_domain *iovad = &domain->consistent_iovad; 363 unsigned long limit = domain->iova_limit; 364 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 365 void *orig = alloc_pages_exact(size, flag); 366 367 if (!iova || !orig) 368 goto err; 369 370 spin_lock(&domain->iotlb_lock); 371 if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1, 372 virt_to_phys(orig), VHOST_MAP_RW, 373 domain->file, (u64)iova)) { 374 spin_unlock(&domain->iotlb_lock); 375 goto err; 376 } 377 spin_unlock(&domain->iotlb_lock); 378 379 *dma_addr = iova; 380 381 return orig; 382 err: 383 *dma_addr = DMA_MAPPING_ERROR; 384 if (orig) 385 free_pages_exact(orig, size); 386 if (iova) 387 vduse_domain_free_iova(iovad, iova, size); 388 389 return NULL; 390 } 391 392 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, 393 void *vaddr, dma_addr_t dma_addr, 394 unsigned long attrs) 395 { 396 struct iova_domain *iovad = &domain->consistent_iovad; 397 struct vhost_iotlb_map *map; 398 struct vdpa_map_file *map_file; 399 phys_addr_t pa; 400 401 spin_lock(&domain->iotlb_lock); 402 map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr, 403 (u64)dma_addr + size - 1); 404 if (WARN_ON(!map)) { 405 spin_unlock(&domain->iotlb_lock); 406 return; 407 } 408 map_file = (struct vdpa_map_file *)map->opaque; 409 fput(map_file->file); 410 kfree(map_file); 411 pa = map->addr; 412 vhost_iotlb_map_free(domain->iotlb, map); 413 spin_unlock(&domain->iotlb_lock); 414 415 vduse_domain_free_iova(iovad, dma_addr, size); 416 free_pages_exact(phys_to_virt(pa), size); 417 } 418 419 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf) 420 { 421 struct vduse_iova_domain *domain = vmf->vma->vm_private_data; 422 unsigned long iova = vmf->pgoff << PAGE_SHIFT; 423 struct page *page; 424 425 if (!domain) 426 return VM_FAULT_SIGBUS; 427 428 if (iova < domain->bounce_size) 429 page = vduse_domain_get_bounce_page(domain, iova); 430 else 431 page = vduse_domain_get_coherent_page(domain, iova); 432 433 if (!page) 434 return VM_FAULT_SIGBUS; 435 436 vmf->page = page; 437 438 return 0; 439 } 440 441 static const struct vm_operations_struct vduse_domain_mmap_ops = { 442 .fault = vduse_domain_mmap_fault, 443 }; 444 445 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma) 446 { 447 struct vduse_iova_domain *domain = file->private_data; 448 449 vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND; 450 vma->vm_private_data = domain; 451 vma->vm_ops = &vduse_domain_mmap_ops; 452 453 return 0; 454 } 455 456 static int vduse_domain_release(struct inode *inode, struct file *file) 457 { 458 struct vduse_iova_domain *domain = file->private_data; 459 460 spin_lock(&domain->iotlb_lock); 461 vduse_iotlb_del_range(domain, 0, ULLONG_MAX); 462 vduse_domain_free_bounce_pages(domain); 463 spin_unlock(&domain->iotlb_lock); 464 put_iova_domain(&domain->stream_iovad); 465 put_iova_domain(&domain->consistent_iovad); 466 vhost_iotlb_free(domain->iotlb); 467 vfree(domain->bounce_maps); 468 kfree(domain); 469 470 return 0; 471 } 472 473 static const struct file_operations vduse_domain_fops = { 474 .owner = THIS_MODULE, 475 .mmap = vduse_domain_mmap, 476 .release = vduse_domain_release, 477 }; 478 479 void vduse_domain_destroy(struct vduse_iova_domain *domain) 480 { 481 fput(domain->file); 482 } 483 484 struct vduse_iova_domain * 485 vduse_domain_create(unsigned long iova_limit, size_t bounce_size) 486 { 487 struct vduse_iova_domain *domain; 488 struct file *file; 489 struct vduse_bounce_map *map; 490 unsigned long pfn, bounce_pfns; 491 492 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; 493 if (iova_limit <= bounce_size) 494 return NULL; 495 496 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 497 if (!domain) 498 return NULL; 499 500 domain->iotlb = vhost_iotlb_alloc(0, 0); 501 if (!domain->iotlb) 502 goto err_iotlb; 503 504 domain->iova_limit = iova_limit; 505 domain->bounce_size = PAGE_ALIGN(bounce_size); 506 domain->bounce_maps = vzalloc(bounce_pfns * 507 sizeof(struct vduse_bounce_map)); 508 if (!domain->bounce_maps) 509 goto err_map; 510 511 for (pfn = 0; pfn < bounce_pfns; pfn++) { 512 map = &domain->bounce_maps[pfn]; 513 map->orig_phys = INVALID_PHYS_ADDR; 514 } 515 file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops, 516 domain, O_RDWR); 517 if (IS_ERR(file)) 518 goto err_file; 519 520 domain->file = file; 521 spin_lock_init(&domain->iotlb_lock); 522 init_iova_domain(&domain->stream_iovad, 523 PAGE_SIZE, IOVA_START_PFN); 524 init_iova_domain(&domain->consistent_iovad, 525 PAGE_SIZE, bounce_pfns); 526 527 return domain; 528 err_file: 529 vfree(domain->bounce_maps); 530 err_map: 531 vhost_iotlb_free(domain->iotlb); 532 err_iotlb: 533 kfree(domain); 534 return NULL; 535 } 536 537 int vduse_domain_init(void) 538 { 539 return iova_cache_get(); 540 } 541 542 void vduse_domain_exit(void) 543 { 544 iova_cache_put(); 545 } 546