1 /****************************************************************************** 2 * gntdev.c 3 * 4 * Device for accessing (in user-space) pages that have been granted by other 5 * domains. 6 * 7 * Copyright (c) 2006-2007, D G Murray. 8 * (c) 2009 Gerd Hoffmann <kraxel@redhat.com> 9 * (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 */ 20 21 #undef DEBUG 22 23 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 24 25 #include <linux/dma-mapping.h> 26 #include <linux/module.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/miscdevice.h> 30 #include <linux/fs.h> 31 #include <linux/uaccess.h> 32 #include <linux/sched.h> 33 #include <linux/sched/mm.h> 34 #include <linux/spinlock.h> 35 #include <linux/slab.h> 36 #include <linux/highmem.h> 37 #include <linux/refcount.h> 38 39 #include <xen/xen.h> 40 #include <xen/grant_table.h> 41 #include <xen/balloon.h> 42 #include <xen/gntdev.h> 43 #include <xen/events.h> 44 #include <xen/page.h> 45 #include <asm/xen/hypervisor.h> 46 #include <asm/xen/hypercall.h> 47 48 #include "gntdev-common.h" 49 #ifdef CONFIG_XEN_GNTDEV_DMABUF 50 #include "gntdev-dmabuf.h" 51 #endif 52 53 MODULE_LICENSE("GPL"); 54 MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " 55 "Gerd Hoffmann <kraxel@redhat.com>"); 56 MODULE_DESCRIPTION("User-space granted page access driver"); 57 58 static int limit = 1024*1024; 59 module_param(limit, int, 0644); 60 MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " 61 "the gntdev device"); 62 63 static atomic_t pages_mapped = ATOMIC_INIT(0); 64 65 static int use_ptemod; 66 #define populate_freeable_maps use_ptemod 67 68 static int unmap_grant_pages(struct gntdev_grant_map *map, 69 int offset, int pages); 70 71 static struct miscdevice gntdev_miscdev; 72 73 /* ------------------------------------------------------------------ */ 74 75 bool gntdev_account_mapped_pages(int count) 76 { 77 return atomic_add_return(count, &pages_mapped) > limit; 78 } 79 80 static void gntdev_print_maps(struct gntdev_priv *priv, 81 char *text, int text_index) 82 { 83 #ifdef DEBUG 84 struct gntdev_grant_map *map; 85 86 pr_debug("%s: maps list (priv %p)\n", __func__, priv); 87 list_for_each_entry(map, &priv->maps, next) 88 pr_debug(" index %2d, count %2d %s\n", 89 map->index, map->count, 90 map->index == text_index && text ? text : ""); 91 #endif 92 } 93 94 static void gntdev_free_map(struct gntdev_grant_map *map) 95 { 96 if (map == NULL) 97 return; 98 99 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC 100 if (map->dma_vaddr) { 101 struct gnttab_dma_alloc_args args; 102 103 args.dev = map->dma_dev; 104 args.coherent = !!(map->dma_flags & GNTDEV_DMA_FLAG_COHERENT); 105 args.nr_pages = map->count; 106 args.pages = map->pages; 107 args.frames = map->frames; 108 args.vaddr = map->dma_vaddr; 109 args.dev_bus_addr = map->dma_bus_addr; 110 111 gnttab_dma_free_pages(&args); 112 } else 113 #endif 114 if (map->pages) 115 gnttab_free_pages(map->count, map->pages); 116 117 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC 118 kfree(map->frames); 119 #endif 120 kfree(map->pages); 121 kfree(map->grants); 122 kfree(map->map_ops); 123 kfree(map->unmap_ops); 124 kfree(map->kmap_ops); 125 kfree(map->kunmap_ops); 126 kfree(map); 127 } 128 129 struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, 130 int dma_flags) 131 { 132 struct gntdev_grant_map *add; 133 int i; 134 135 add = kzalloc(sizeof(*add), GFP_KERNEL); 136 if (NULL == add) 137 return NULL; 138 139 add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); 140 add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); 141 add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); 142 add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); 143 add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); 144 add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); 145 if (NULL == add->grants || 146 NULL == add->map_ops || 147 NULL == add->unmap_ops || 148 NULL == add->kmap_ops || 149 NULL == add->kunmap_ops || 150 NULL == add->pages) 151 goto err; 152 153 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC 154 add->dma_flags = dma_flags; 155 156 /* 157 * Check if this mapping is requested to be backed 158 * by a DMA buffer. 159 */ 160 if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) { 161 struct gnttab_dma_alloc_args args; 162 163 add->frames = kcalloc(count, sizeof(add->frames[0]), 164 GFP_KERNEL); 165 if (!add->frames) 166 goto err; 167 168 /* Remember the device, so we can free DMA memory. */ 169 add->dma_dev = priv->dma_dev; 170 171 args.dev = priv->dma_dev; 172 args.coherent = !!(dma_flags & GNTDEV_DMA_FLAG_COHERENT); 173 args.nr_pages = count; 174 args.pages = add->pages; 175 args.frames = add->frames; 176 177 if (gnttab_dma_alloc_pages(&args)) 178 goto err; 179 180 add->dma_vaddr = args.vaddr; 181 add->dma_bus_addr = args.dev_bus_addr; 182 } else 183 #endif 184 if (gnttab_alloc_pages(count, add->pages)) 185 goto err; 186 187 for (i = 0; i < count; i++) { 188 add->map_ops[i].handle = -1; 189 add->unmap_ops[i].handle = -1; 190 add->kmap_ops[i].handle = -1; 191 add->kunmap_ops[i].handle = -1; 192 } 193 194 add->index = 0; 195 add->count = count; 196 refcount_set(&add->users, 1); 197 198 return add; 199 200 err: 201 gntdev_free_map(add); 202 return NULL; 203 } 204 205 void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add) 206 { 207 struct gntdev_grant_map *map; 208 209 list_for_each_entry(map, &priv->maps, next) { 210 if (add->index + add->count < map->index) { 211 list_add_tail(&add->next, &map->next); 212 goto done; 213 } 214 add->index = map->index + map->count; 215 } 216 list_add_tail(&add->next, &priv->maps); 217 218 done: 219 gntdev_print_maps(priv, "[new]", add->index); 220 } 221 222 static struct gntdev_grant_map *gntdev_find_map_index(struct gntdev_priv *priv, 223 int index, int count) 224 { 225 struct gntdev_grant_map *map; 226 227 list_for_each_entry(map, &priv->maps, next) { 228 if (map->index != index) 229 continue; 230 if (count && map->count != count) 231 continue; 232 return map; 233 } 234 return NULL; 235 } 236 237 void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) 238 { 239 if (!map) 240 return; 241 242 if (!refcount_dec_and_test(&map->users)) 243 return; 244 245 atomic_sub(map->count, &pages_mapped); 246 247 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 248 notify_remote_via_evtchn(map->notify.event); 249 evtchn_put(map->notify.event); 250 } 251 252 if (populate_freeable_maps && priv) { 253 mutex_lock(&priv->lock); 254 list_del(&map->next); 255 mutex_unlock(&priv->lock); 256 } 257 258 if (map->pages && !use_ptemod) 259 unmap_grant_pages(map, 0, map->count); 260 gntdev_free_map(map); 261 } 262 263 /* ------------------------------------------------------------------ */ 264 265 static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) 266 { 267 struct gntdev_grant_map *map = data; 268 unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; 269 int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; 270 u64 pte_maddr; 271 272 BUG_ON(pgnr >= map->count); 273 pte_maddr = arbitrary_virt_to_machine(pte).maddr; 274 275 /* 276 * Set the PTE as special to force get_user_pages_fast() fall 277 * back to the slow path. If this is not supported as part of 278 * the grant map, it will be done afterwards. 279 */ 280 if (xen_feature(XENFEAT_gnttab_map_avail_bits)) 281 flags |= (1 << _GNTMAP_guest_avail0); 282 283 gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, 284 map->grants[pgnr].ref, 285 map->grants[pgnr].domid); 286 gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, 287 -1 /* handle */); 288 return 0; 289 } 290 291 #ifdef CONFIG_X86 292 static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) 293 { 294 set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); 295 return 0; 296 } 297 #endif 298 299 int gntdev_map_grant_pages(struct gntdev_grant_map *map) 300 { 301 int i, err = 0; 302 303 if (!use_ptemod) { 304 /* Note: it could already be mapped */ 305 if (map->map_ops[0].handle != -1) 306 return 0; 307 for (i = 0; i < map->count; i++) { 308 unsigned long addr = (unsigned long) 309 pfn_to_kaddr(page_to_pfn(map->pages[i])); 310 gnttab_set_map_op(&map->map_ops[i], addr, map->flags, 311 map->grants[i].ref, 312 map->grants[i].domid); 313 gnttab_set_unmap_op(&map->unmap_ops[i], addr, 314 map->flags, -1 /* handle */); 315 } 316 } else { 317 /* 318 * Setup the map_ops corresponding to the pte entries pointing 319 * to the kernel linear addresses of the struct pages. 320 * These ptes are completely different from the user ptes dealt 321 * with find_grant_ptes. 322 */ 323 for (i = 0; i < map->count; i++) { 324 unsigned long address = (unsigned long) 325 pfn_to_kaddr(page_to_pfn(map->pages[i])); 326 BUG_ON(PageHighMem(map->pages[i])); 327 328 gnttab_set_map_op(&map->kmap_ops[i], address, 329 map->flags | GNTMAP_host_map, 330 map->grants[i].ref, 331 map->grants[i].domid); 332 gnttab_set_unmap_op(&map->kunmap_ops[i], address, 333 map->flags | GNTMAP_host_map, -1); 334 } 335 } 336 337 pr_debug("map %d+%d\n", map->index, map->count); 338 err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, 339 map->pages, map->count); 340 if (err) 341 return err; 342 343 for (i = 0; i < map->count; i++) { 344 if (map->map_ops[i].status) { 345 err = -EINVAL; 346 continue; 347 } 348 349 map->unmap_ops[i].handle = map->map_ops[i].handle; 350 if (use_ptemod) 351 map->kunmap_ops[i].handle = map->kmap_ops[i].handle; 352 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC 353 else if (map->dma_vaddr) { 354 unsigned long bfn; 355 356 bfn = pfn_to_bfn(page_to_pfn(map->pages[i])); 357 map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn); 358 } 359 #endif 360 } 361 return err; 362 } 363 364 static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, 365 int pages) 366 { 367 int i, err = 0; 368 struct gntab_unmap_queue_data unmap_data; 369 370 if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 371 int pgno = (map->notify.addr >> PAGE_SHIFT); 372 if (pgno >= offset && pgno < offset + pages) { 373 /* No need for kmap, pages are in lowmem */ 374 uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); 375 tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; 376 map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 377 } 378 } 379 380 unmap_data.unmap_ops = map->unmap_ops + offset; 381 unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; 382 unmap_data.pages = map->pages + offset; 383 unmap_data.count = pages; 384 385 err = gnttab_unmap_refs_sync(&unmap_data); 386 if (err) 387 return err; 388 389 for (i = 0; i < pages; i++) { 390 if (map->unmap_ops[offset+i].status) 391 err = -EINVAL; 392 pr_debug("unmap handle=%d st=%d\n", 393 map->unmap_ops[offset+i].handle, 394 map->unmap_ops[offset+i].status); 395 map->unmap_ops[offset+i].handle = -1; 396 } 397 return err; 398 } 399 400 static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, 401 int pages) 402 { 403 int range, err = 0; 404 405 pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 406 407 /* It is possible the requested range will have a "hole" where we 408 * already unmapped some of the grants. Only unmap valid ranges. 409 */ 410 while (pages && !err) { 411 while (pages && map->unmap_ops[offset].handle == -1) { 412 offset++; 413 pages--; 414 } 415 range = 0; 416 while (range < pages) { 417 if (map->unmap_ops[offset+range].handle == -1) 418 break; 419 range++; 420 } 421 err = __unmap_grant_pages(map, offset, range); 422 offset += range; 423 pages -= range; 424 } 425 426 return err; 427 } 428 429 /* ------------------------------------------------------------------ */ 430 431 static void gntdev_vma_open(struct vm_area_struct *vma) 432 { 433 struct gntdev_grant_map *map = vma->vm_private_data; 434 435 pr_debug("gntdev_vma_open %p\n", vma); 436 refcount_inc(&map->users); 437 } 438 439 static void gntdev_vma_close(struct vm_area_struct *vma) 440 { 441 struct gntdev_grant_map *map = vma->vm_private_data; 442 struct file *file = vma->vm_file; 443 struct gntdev_priv *priv = file->private_data; 444 445 pr_debug("gntdev_vma_close %p\n", vma); 446 if (use_ptemod) { 447 /* It is possible that an mmu notifier could be running 448 * concurrently, so take priv->lock to ensure that the vma won't 449 * vanishing during the unmap_grant_pages call, since we will 450 * spin here until that completes. Such a concurrent call will 451 * not do any unmapping, since that has been done prior to 452 * closing the vma, but it may still iterate the unmap_ops list. 453 */ 454 mutex_lock(&priv->lock); 455 map->vma = NULL; 456 mutex_unlock(&priv->lock); 457 } 458 vma->vm_private_data = NULL; 459 gntdev_put_map(priv, map); 460 } 461 462 static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, 463 unsigned long addr) 464 { 465 struct gntdev_grant_map *map = vma->vm_private_data; 466 467 return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; 468 } 469 470 static const struct vm_operations_struct gntdev_vmops = { 471 .open = gntdev_vma_open, 472 .close = gntdev_vma_close, 473 .find_special_page = gntdev_vma_find_special_page, 474 }; 475 476 /* ------------------------------------------------------------------ */ 477 478 static bool in_range(struct gntdev_grant_map *map, 479 unsigned long start, unsigned long end) 480 { 481 if (!map->vma) 482 return false; 483 if (map->vma->vm_start >= end) 484 return false; 485 if (map->vma->vm_end <= start) 486 return false; 487 488 return true; 489 } 490 491 static int unmap_if_in_range(struct gntdev_grant_map *map, 492 unsigned long start, unsigned long end, 493 bool blockable) 494 { 495 unsigned long mstart, mend; 496 int err; 497 498 if (!in_range(map, start, end)) 499 return 0; 500 501 if (!blockable) 502 return -EAGAIN; 503 504 mstart = max(start, map->vma->vm_start); 505 mend = min(end, map->vma->vm_end); 506 pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", 507 map->index, map->count, 508 map->vma->vm_start, map->vma->vm_end, 509 start, end, mstart, mend); 510 err = unmap_grant_pages(map, 511 (mstart - map->vma->vm_start) >> PAGE_SHIFT, 512 (mend - mstart) >> PAGE_SHIFT); 513 WARN_ON(err); 514 515 return 0; 516 } 517 518 static int mn_invl_range_start(struct mmu_notifier *mn, 519 const struct mmu_notifier_range *range) 520 { 521 struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); 522 struct gntdev_grant_map *map; 523 int ret = 0; 524 525 if (mmu_notifier_range_blockable(range)) 526 mutex_lock(&priv->lock); 527 else if (!mutex_trylock(&priv->lock)) 528 return -EAGAIN; 529 530 list_for_each_entry(map, &priv->maps, next) { 531 ret = unmap_if_in_range(map, range->start, range->end, 532 mmu_notifier_range_blockable(range)); 533 if (ret) 534 goto out_unlock; 535 } 536 list_for_each_entry(map, &priv->freeable_maps, next) { 537 ret = unmap_if_in_range(map, range->start, range->end, 538 mmu_notifier_range_blockable(range)); 539 if (ret) 540 goto out_unlock; 541 } 542 543 out_unlock: 544 mutex_unlock(&priv->lock); 545 546 return ret; 547 } 548 549 static void mn_release(struct mmu_notifier *mn, 550 struct mm_struct *mm) 551 { 552 struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); 553 struct gntdev_grant_map *map; 554 int err; 555 556 mutex_lock(&priv->lock); 557 list_for_each_entry(map, &priv->maps, next) { 558 if (!map->vma) 559 continue; 560 pr_debug("map %d+%d (%lx %lx)\n", 561 map->index, map->count, 562 map->vma->vm_start, map->vma->vm_end); 563 err = unmap_grant_pages(map, /* offset */ 0, map->count); 564 WARN_ON(err); 565 } 566 list_for_each_entry(map, &priv->freeable_maps, next) { 567 if (!map->vma) 568 continue; 569 pr_debug("map %d+%d (%lx %lx)\n", 570 map->index, map->count, 571 map->vma->vm_start, map->vma->vm_end); 572 err = unmap_grant_pages(map, /* offset */ 0, map->count); 573 WARN_ON(err); 574 } 575 mutex_unlock(&priv->lock); 576 } 577 578 static const struct mmu_notifier_ops gntdev_mmu_ops = { 579 .release = mn_release, 580 .invalidate_range_start = mn_invl_range_start, 581 }; 582 583 /* ------------------------------------------------------------------ */ 584 585 static int gntdev_open(struct inode *inode, struct file *flip) 586 { 587 struct gntdev_priv *priv; 588 int ret = 0; 589 590 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 591 if (!priv) 592 return -ENOMEM; 593 594 INIT_LIST_HEAD(&priv->maps); 595 INIT_LIST_HEAD(&priv->freeable_maps); 596 mutex_init(&priv->lock); 597 598 #ifdef CONFIG_XEN_GNTDEV_DMABUF 599 priv->dmabuf_priv = gntdev_dmabuf_init(flip); 600 if (IS_ERR(priv->dmabuf_priv)) { 601 ret = PTR_ERR(priv->dmabuf_priv); 602 kfree(priv); 603 return ret; 604 } 605 #endif 606 607 if (use_ptemod) { 608 priv->mm = get_task_mm(current); 609 if (!priv->mm) { 610 kfree(priv); 611 return -ENOMEM; 612 } 613 priv->mn.ops = &gntdev_mmu_ops; 614 ret = mmu_notifier_register(&priv->mn, priv->mm); 615 mmput(priv->mm); 616 } 617 618 if (ret) { 619 kfree(priv); 620 return ret; 621 } 622 623 flip->private_data = priv; 624 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC 625 priv->dma_dev = gntdev_miscdev.this_device; 626 dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64)); 627 #endif 628 pr_debug("priv %p\n", priv); 629 630 return 0; 631 } 632 633 static int gntdev_release(struct inode *inode, struct file *flip) 634 { 635 struct gntdev_priv *priv = flip->private_data; 636 struct gntdev_grant_map *map; 637 638 pr_debug("priv %p\n", priv); 639 640 mutex_lock(&priv->lock); 641 while (!list_empty(&priv->maps)) { 642 map = list_entry(priv->maps.next, 643 struct gntdev_grant_map, next); 644 list_del(&map->next); 645 gntdev_put_map(NULL /* already removed */, map); 646 } 647 WARN_ON(!list_empty(&priv->freeable_maps)); 648 mutex_unlock(&priv->lock); 649 650 #ifdef CONFIG_XEN_GNTDEV_DMABUF 651 gntdev_dmabuf_fini(priv->dmabuf_priv); 652 #endif 653 654 if (use_ptemod) 655 mmu_notifier_unregister(&priv->mn, priv->mm); 656 657 kfree(priv); 658 return 0; 659 } 660 661 static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, 662 struct ioctl_gntdev_map_grant_ref __user *u) 663 { 664 struct ioctl_gntdev_map_grant_ref op; 665 struct gntdev_grant_map *map; 666 int err; 667 668 if (copy_from_user(&op, u, sizeof(op)) != 0) 669 return -EFAULT; 670 pr_debug("priv %p, add %d\n", priv, op.count); 671 if (unlikely(op.count <= 0)) 672 return -EINVAL; 673 674 err = -ENOMEM; 675 map = gntdev_alloc_map(priv, op.count, 0 /* This is not a dma-buf. */); 676 if (!map) 677 return err; 678 679 if (unlikely(gntdev_account_mapped_pages(op.count))) { 680 pr_debug("can't map: over limit\n"); 681 gntdev_put_map(NULL, map); 682 return err; 683 } 684 685 if (copy_from_user(map->grants, &u->refs, 686 sizeof(map->grants[0]) * op.count) != 0) { 687 gntdev_put_map(NULL, map); 688 return -EFAULT; 689 } 690 691 mutex_lock(&priv->lock); 692 gntdev_add_map(priv, map); 693 op.index = map->index << PAGE_SHIFT; 694 mutex_unlock(&priv->lock); 695 696 if (copy_to_user(u, &op, sizeof(op)) != 0) 697 return -EFAULT; 698 699 return 0; 700 } 701 702 static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, 703 struct ioctl_gntdev_unmap_grant_ref __user *u) 704 { 705 struct ioctl_gntdev_unmap_grant_ref op; 706 struct gntdev_grant_map *map; 707 int err = -ENOENT; 708 709 if (copy_from_user(&op, u, sizeof(op)) != 0) 710 return -EFAULT; 711 pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); 712 713 mutex_lock(&priv->lock); 714 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 715 if (map) { 716 list_del(&map->next); 717 if (populate_freeable_maps) 718 list_add_tail(&map->next, &priv->freeable_maps); 719 err = 0; 720 } 721 mutex_unlock(&priv->lock); 722 if (map) 723 gntdev_put_map(priv, map); 724 return err; 725 } 726 727 static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, 728 struct ioctl_gntdev_get_offset_for_vaddr __user *u) 729 { 730 struct ioctl_gntdev_get_offset_for_vaddr op; 731 struct vm_area_struct *vma; 732 struct gntdev_grant_map *map; 733 int rv = -EINVAL; 734 735 if (copy_from_user(&op, u, sizeof(op)) != 0) 736 return -EFAULT; 737 pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); 738 739 down_read(¤t->mm->mmap_sem); 740 vma = find_vma(current->mm, op.vaddr); 741 if (!vma || vma->vm_ops != &gntdev_vmops) 742 goto out_unlock; 743 744 map = vma->vm_private_data; 745 if (!map) 746 goto out_unlock; 747 748 op.offset = map->index << PAGE_SHIFT; 749 op.count = map->count; 750 rv = 0; 751 752 out_unlock: 753 up_read(¤t->mm->mmap_sem); 754 755 if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0) 756 return -EFAULT; 757 return rv; 758 } 759 760 static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) 761 { 762 struct ioctl_gntdev_unmap_notify op; 763 struct gntdev_grant_map *map; 764 int rc; 765 int out_flags; 766 unsigned int out_event; 767 768 if (copy_from_user(&op, u, sizeof(op))) 769 return -EFAULT; 770 771 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) 772 return -EINVAL; 773 774 /* We need to grab a reference to the event channel we are going to use 775 * to send the notify before releasing the reference we may already have 776 * (if someone has called this ioctl twice). This is required so that 777 * it is possible to change the clear_byte part of the notification 778 * without disturbing the event channel part, which may now be the last 779 * reference to that event channel. 780 */ 781 if (op.action & UNMAP_NOTIFY_SEND_EVENT) { 782 if (evtchn_get(op.event_channel_port)) 783 return -EINVAL; 784 } 785 786 out_flags = op.action; 787 out_event = op.event_channel_port; 788 789 mutex_lock(&priv->lock); 790 791 list_for_each_entry(map, &priv->maps, next) { 792 uint64_t begin = map->index << PAGE_SHIFT; 793 uint64_t end = (map->index + map->count) << PAGE_SHIFT; 794 if (op.index >= begin && op.index < end) 795 goto found; 796 } 797 rc = -ENOENT; 798 goto unlock_out; 799 800 found: 801 if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && 802 (map->flags & GNTMAP_readonly)) { 803 rc = -EINVAL; 804 goto unlock_out; 805 } 806 807 out_flags = map->notify.flags; 808 out_event = map->notify.event; 809 810 map->notify.flags = op.action; 811 map->notify.addr = op.index - (map->index << PAGE_SHIFT); 812 map->notify.event = op.event_channel_port; 813 814 rc = 0; 815 816 unlock_out: 817 mutex_unlock(&priv->lock); 818 819 /* Drop the reference to the event channel we did not save in the map */ 820 if (out_flags & UNMAP_NOTIFY_SEND_EVENT) 821 evtchn_put(out_event); 822 823 return rc; 824 } 825 826 #define GNTDEV_COPY_BATCH 16 827 828 struct gntdev_copy_batch { 829 struct gnttab_copy ops[GNTDEV_COPY_BATCH]; 830 struct page *pages[GNTDEV_COPY_BATCH]; 831 s16 __user *status[GNTDEV_COPY_BATCH]; 832 unsigned int nr_ops; 833 unsigned int nr_pages; 834 }; 835 836 static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, 837 bool writeable, unsigned long *gfn) 838 { 839 unsigned long addr = (unsigned long)virt; 840 struct page *page; 841 unsigned long xen_pfn; 842 int ret; 843 844 ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page); 845 if (ret < 0) 846 return ret; 847 848 batch->pages[batch->nr_pages++] = page; 849 850 xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK); 851 *gfn = pfn_to_gfn(xen_pfn); 852 853 return 0; 854 } 855 856 static void gntdev_put_pages(struct gntdev_copy_batch *batch) 857 { 858 unsigned int i; 859 860 for (i = 0; i < batch->nr_pages; i++) 861 put_page(batch->pages[i]); 862 batch->nr_pages = 0; 863 } 864 865 static int gntdev_copy(struct gntdev_copy_batch *batch) 866 { 867 unsigned int i; 868 869 gnttab_batch_copy(batch->ops, batch->nr_ops); 870 gntdev_put_pages(batch); 871 872 /* 873 * For each completed op, update the status if the op failed 874 * and all previous ops for the segment were successful. 875 */ 876 for (i = 0; i < batch->nr_ops; i++) { 877 s16 status = batch->ops[i].status; 878 s16 old_status; 879 880 if (status == GNTST_okay) 881 continue; 882 883 if (__get_user(old_status, batch->status[i])) 884 return -EFAULT; 885 886 if (old_status != GNTST_okay) 887 continue; 888 889 if (__put_user(status, batch->status[i])) 890 return -EFAULT; 891 } 892 893 batch->nr_ops = 0; 894 return 0; 895 } 896 897 static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, 898 struct gntdev_grant_copy_segment *seg, 899 s16 __user *status) 900 { 901 uint16_t copied = 0; 902 903 /* 904 * Disallow local -> local copies since there is only space in 905 * batch->pages for one page per-op and this would be a very 906 * expensive memcpy(). 907 */ 908 if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref))) 909 return -EINVAL; 910 911 /* Can't cross page if source/dest is a grant ref. */ 912 if (seg->flags & GNTCOPY_source_gref) { 913 if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE) 914 return -EINVAL; 915 } 916 if (seg->flags & GNTCOPY_dest_gref) { 917 if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE) 918 return -EINVAL; 919 } 920 921 if (put_user(GNTST_okay, status)) 922 return -EFAULT; 923 924 while (copied < seg->len) { 925 struct gnttab_copy *op; 926 void __user *virt; 927 size_t len, off; 928 unsigned long gfn; 929 int ret; 930 931 if (batch->nr_ops >= GNTDEV_COPY_BATCH) { 932 ret = gntdev_copy(batch); 933 if (ret < 0) 934 return ret; 935 } 936 937 len = seg->len - copied; 938 939 op = &batch->ops[batch->nr_ops]; 940 op->flags = 0; 941 942 if (seg->flags & GNTCOPY_source_gref) { 943 op->source.u.ref = seg->source.foreign.ref; 944 op->source.domid = seg->source.foreign.domid; 945 op->source.offset = seg->source.foreign.offset + copied; 946 op->flags |= GNTCOPY_source_gref; 947 } else { 948 virt = seg->source.virt + copied; 949 off = (unsigned long)virt & ~XEN_PAGE_MASK; 950 len = min(len, (size_t)XEN_PAGE_SIZE - off); 951 952 ret = gntdev_get_page(batch, virt, false, &gfn); 953 if (ret < 0) 954 return ret; 955 956 op->source.u.gmfn = gfn; 957 op->source.domid = DOMID_SELF; 958 op->source.offset = off; 959 } 960 961 if (seg->flags & GNTCOPY_dest_gref) { 962 op->dest.u.ref = seg->dest.foreign.ref; 963 op->dest.domid = seg->dest.foreign.domid; 964 op->dest.offset = seg->dest.foreign.offset + copied; 965 op->flags |= GNTCOPY_dest_gref; 966 } else { 967 virt = seg->dest.virt + copied; 968 off = (unsigned long)virt & ~XEN_PAGE_MASK; 969 len = min(len, (size_t)XEN_PAGE_SIZE - off); 970 971 ret = gntdev_get_page(batch, virt, true, &gfn); 972 if (ret < 0) 973 return ret; 974 975 op->dest.u.gmfn = gfn; 976 op->dest.domid = DOMID_SELF; 977 op->dest.offset = off; 978 } 979 980 op->len = len; 981 copied += len; 982 983 batch->status[batch->nr_ops] = status; 984 batch->nr_ops++; 985 } 986 987 return 0; 988 } 989 990 static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u) 991 { 992 struct ioctl_gntdev_grant_copy copy; 993 struct gntdev_copy_batch batch; 994 unsigned int i; 995 int ret = 0; 996 997 if (copy_from_user(©, u, sizeof(copy))) 998 return -EFAULT; 999 1000 batch.nr_ops = 0; 1001 batch.nr_pages = 0; 1002 1003 for (i = 0; i < copy.count; i++) { 1004 struct gntdev_grant_copy_segment seg; 1005 1006 if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) { 1007 ret = -EFAULT; 1008 goto out; 1009 } 1010 1011 ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status); 1012 if (ret < 0) 1013 goto out; 1014 1015 cond_resched(); 1016 } 1017 if (batch.nr_ops) 1018 ret = gntdev_copy(&batch); 1019 return ret; 1020 1021 out: 1022 gntdev_put_pages(&batch); 1023 return ret; 1024 } 1025 1026 static long gntdev_ioctl(struct file *flip, 1027 unsigned int cmd, unsigned long arg) 1028 { 1029 struct gntdev_priv *priv = flip->private_data; 1030 void __user *ptr = (void __user *)arg; 1031 1032 switch (cmd) { 1033 case IOCTL_GNTDEV_MAP_GRANT_REF: 1034 return gntdev_ioctl_map_grant_ref(priv, ptr); 1035 1036 case IOCTL_GNTDEV_UNMAP_GRANT_REF: 1037 return gntdev_ioctl_unmap_grant_ref(priv, ptr); 1038 1039 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: 1040 return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); 1041 1042 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: 1043 return gntdev_ioctl_notify(priv, ptr); 1044 1045 case IOCTL_GNTDEV_GRANT_COPY: 1046 return gntdev_ioctl_grant_copy(priv, ptr); 1047 1048 #ifdef CONFIG_XEN_GNTDEV_DMABUF 1049 case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS: 1050 return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr); 1051 1052 case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED: 1053 return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr); 1054 1055 case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS: 1056 return gntdev_ioctl_dmabuf_imp_to_refs(priv, ptr); 1057 1058 case IOCTL_GNTDEV_DMABUF_IMP_RELEASE: 1059 return gntdev_ioctl_dmabuf_imp_release(priv, ptr); 1060 #endif 1061 1062 default: 1063 pr_debug("priv %p, unknown cmd %x\n", priv, cmd); 1064 return -ENOIOCTLCMD; 1065 } 1066 1067 return 0; 1068 } 1069 1070 static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) 1071 { 1072 struct gntdev_priv *priv = flip->private_data; 1073 int index = vma->vm_pgoff; 1074 int count = vma_pages(vma); 1075 struct gntdev_grant_map *map; 1076 int err = -EINVAL; 1077 1078 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) 1079 return -EINVAL; 1080 1081 pr_debug("map %d+%d at %lx (pgoff %lx)\n", 1082 index, count, vma->vm_start, vma->vm_pgoff); 1083 1084 mutex_lock(&priv->lock); 1085 map = gntdev_find_map_index(priv, index, count); 1086 if (!map) 1087 goto unlock_out; 1088 if (use_ptemod && map->vma) 1089 goto unlock_out; 1090 if (use_ptemod && priv->mm != vma->vm_mm) { 1091 pr_warn("Huh? Other mm?\n"); 1092 goto unlock_out; 1093 } 1094 1095 refcount_inc(&map->users); 1096 1097 vma->vm_ops = &gntdev_vmops; 1098 1099 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; 1100 1101 if (use_ptemod) 1102 vma->vm_flags |= VM_DONTCOPY; 1103 1104 vma->vm_private_data = map; 1105 1106 if (use_ptemod) 1107 map->vma = vma; 1108 1109 if (map->flags) { 1110 if ((vma->vm_flags & VM_WRITE) && 1111 (map->flags & GNTMAP_readonly)) 1112 goto out_unlock_put; 1113 } else { 1114 map->flags = GNTMAP_host_map; 1115 if (!(vma->vm_flags & VM_WRITE)) 1116 map->flags |= GNTMAP_readonly; 1117 } 1118 1119 mutex_unlock(&priv->lock); 1120 1121 if (use_ptemod) { 1122 map->pages_vm_start = vma->vm_start; 1123 err = apply_to_page_range(vma->vm_mm, vma->vm_start, 1124 vma->vm_end - vma->vm_start, 1125 find_grant_ptes, map); 1126 if (err) { 1127 pr_warn("find_grant_ptes() failure.\n"); 1128 goto out_put_map; 1129 } 1130 } 1131 1132 err = gntdev_map_grant_pages(map); 1133 if (err) 1134 goto out_put_map; 1135 1136 if (!use_ptemod) { 1137 err = vm_map_pages_zero(vma, map->pages, map->count); 1138 if (err) 1139 goto out_put_map; 1140 } else { 1141 #ifdef CONFIG_X86 1142 /* 1143 * If the PTEs were not made special by the grant map 1144 * hypercall, do so here. 1145 * 1146 * This is racy since the mapping is already visible 1147 * to userspace but userspace should be well-behaved 1148 * enough to not touch it until the mmap() call 1149 * returns. 1150 */ 1151 if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { 1152 apply_to_page_range(vma->vm_mm, vma->vm_start, 1153 vma->vm_end - vma->vm_start, 1154 set_grant_ptes_as_special, NULL); 1155 } 1156 #endif 1157 } 1158 1159 return 0; 1160 1161 unlock_out: 1162 mutex_unlock(&priv->lock); 1163 return err; 1164 1165 out_unlock_put: 1166 mutex_unlock(&priv->lock); 1167 out_put_map: 1168 if (use_ptemod) { 1169 map->vma = NULL; 1170 unmap_grant_pages(map, 0, map->count); 1171 } 1172 gntdev_put_map(priv, map); 1173 return err; 1174 } 1175 1176 static const struct file_operations gntdev_fops = { 1177 .owner = THIS_MODULE, 1178 .open = gntdev_open, 1179 .release = gntdev_release, 1180 .mmap = gntdev_mmap, 1181 .unlocked_ioctl = gntdev_ioctl 1182 }; 1183 1184 static struct miscdevice gntdev_miscdev = { 1185 .minor = MISC_DYNAMIC_MINOR, 1186 .name = "xen/gntdev", 1187 .fops = &gntdev_fops, 1188 }; 1189 1190 /* ------------------------------------------------------------------ */ 1191 1192 static int __init gntdev_init(void) 1193 { 1194 int err; 1195 1196 if (!xen_domain()) 1197 return -ENODEV; 1198 1199 use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap); 1200 1201 err = misc_register(&gntdev_miscdev); 1202 if (err != 0) { 1203 pr_err("Could not register gntdev device\n"); 1204 return err; 1205 } 1206 return 0; 1207 } 1208 1209 static void __exit gntdev_exit(void) 1210 { 1211 misc_deregister(&gntdev_miscdev); 1212 } 1213 1214 module_init(gntdev_init); 1215 module_exit(gntdev_exit); 1216 1217 /* ------------------------------------------------------------------ */ 1218