1 /****************************************************************************** 2 * gntalloc.c 3 * 4 * Device for creating grant references (in user-space) that may be shared 5 * with other domains. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License 13 * along with this program; if not, write to the Free Software 14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 15 */ 16 17 /* 18 * This driver exists to allow userspace programs in Linux to allocate kernel 19 * memory that will later be shared with another domain. Without this device, 20 * Linux userspace programs cannot create grant references. 21 * 22 * How this stuff works: 23 * X -> granting a page to Y 24 * Y -> mapping the grant from X 25 * 26 * 1. X uses the gntalloc device to allocate a page of kernel memory, P. 27 * 2. X creates an entry in the grant table that says domid(Y) can access P. 28 * This is done without a hypercall unless the grant table needs expansion. 29 * 3. X gives the grant reference identifier, GREF, to Y. 30 * 4. Y maps the page, either directly into kernel memory for use in a backend 31 * driver, or via a the gntdev device to map into the address space of an 32 * application running in Y. This is the first point at which Xen does any 33 * tracking of the page. 34 * 5. A program in X mmap()s a segment of the gntalloc device that corresponds 35 * to the shared page, and can now communicate with Y over the shared page. 36 * 37 * 38 * NOTE TO USERSPACE LIBRARIES: 39 * The grant allocation and mmap()ing are, naturally, two separate operations. 40 * You set up the sharing by calling the create ioctl() and then the mmap(). 41 * Teardown requires munmap() and either close() or ioctl(). 42 * 43 * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant 44 * reference, this device can be used to consume kernel memory by leaving grant 45 * references mapped by another domain when an application exits. Therefore, 46 * there is a global limit on the number of pages that can be allocated. When 47 * all references to the page are unmapped, it will be freed during the next 48 * grant operation. 49 */ 50 51 #include <linux/atomic.h> 52 #include <linux/module.h> 53 #include <linux/miscdevice.h> 54 #include <linux/kernel.h> 55 #include <linux/init.h> 56 #include <linux/slab.h> 57 #include <linux/fs.h> 58 #include <linux/device.h> 59 #include <linux/mm.h> 60 #include <linux/uaccess.h> 61 #include <linux/types.h> 62 #include <linux/list.h> 63 #include <linux/highmem.h> 64 65 #include <xen/xen.h> 66 #include <xen/page.h> 67 #include <xen/grant_table.h> 68 #include <xen/gntalloc.h> 69 #include <xen/events.h> 70 71 static int limit = 1024; 72 module_param(limit, int, 0644); 73 MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " 74 "the gntalloc device"); 75 76 static LIST_HEAD(gref_list); 77 static DEFINE_SPINLOCK(gref_lock); 78 static int gref_size; 79 80 struct notify_info { 81 uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */ 82 uint16_t flags:2; /* Bits 12-13: Unmap notification flags */ 83 int event; /* Port (event channel) to notify */ 84 }; 85 86 /* Metadata on a grant reference. */ 87 struct gntalloc_gref { 88 struct list_head next_gref; /* list entry gref_list */ 89 struct list_head next_file; /* list entry file->list, if open */ 90 struct page *page; /* The shared page */ 91 uint64_t file_index; /* File offset for mmap() */ 92 unsigned int users; /* Use count - when zero, waiting on Xen */ 93 grant_ref_t gref_id; /* The grant reference number */ 94 struct notify_info notify; /* Unmap notification */ 95 }; 96 97 struct gntalloc_file_private_data { 98 struct list_head list; 99 uint64_t index; 100 }; 101 102 static void __del_gref(struct gntalloc_gref *gref); 103 104 static void do_cleanup(void) 105 { 106 struct gntalloc_gref *gref, *n; 107 list_for_each_entry_safe(gref, n, &gref_list, next_gref) { 108 if (!gref->users) 109 __del_gref(gref); 110 } 111 } 112 113 static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, 114 uint32_t *gref_ids, struct gntalloc_file_private_data *priv) 115 { 116 int i, rc, readonly; 117 LIST_HEAD(queue_gref); 118 LIST_HEAD(queue_file); 119 struct gntalloc_gref *gref; 120 121 readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); 122 rc = -ENOMEM; 123 for (i = 0; i < op->count; i++) { 124 gref = kzalloc(sizeof(*gref), GFP_KERNEL); 125 if (!gref) 126 goto undo; 127 list_add_tail(&gref->next_gref, &queue_gref); 128 list_add_tail(&gref->next_file, &queue_file); 129 gref->users = 1; 130 gref->file_index = op->index + i * PAGE_SIZE; 131 gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); 132 if (!gref->page) 133 goto undo; 134 135 /* Grant foreign access to the page. */ 136 gref->gref_id = gnttab_grant_foreign_access(op->domid, 137 pfn_to_mfn(page_to_pfn(gref->page)), readonly); 138 if (gref->gref_id < 0) { 139 rc = gref->gref_id; 140 goto undo; 141 } 142 gref_ids[i] = gref->gref_id; 143 } 144 145 /* Add to gref lists. */ 146 spin_lock(&gref_lock); 147 list_splice_tail(&queue_gref, &gref_list); 148 list_splice_tail(&queue_file, &priv->list); 149 spin_unlock(&gref_lock); 150 151 return 0; 152 153 undo: 154 spin_lock(&gref_lock); 155 gref_size -= (op->count - i); 156 157 list_for_each_entry(gref, &queue_file, next_file) { 158 /* __del_gref does not remove from queue_file */ 159 __del_gref(gref); 160 } 161 162 /* It's possible for the target domain to map the just-allocated grant 163 * references by blindly guessing their IDs; if this is done, then 164 * __del_gref will leave them in the queue_gref list. They need to be 165 * added to the global list so that we can free them when they are no 166 * longer referenced. 167 */ 168 if (unlikely(!list_empty(&queue_gref))) 169 list_splice_tail(&queue_gref, &gref_list); 170 spin_unlock(&gref_lock); 171 return rc; 172 } 173 174 static void __del_gref(struct gntalloc_gref *gref) 175 { 176 if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 177 uint8_t *tmp = kmap(gref->page); 178 tmp[gref->notify.pgoff] = 0; 179 kunmap(gref->page); 180 } 181 if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) 182 notify_remote_via_evtchn(gref->notify.event); 183 184 gref->notify.flags = 0; 185 186 if (gref->gref_id > 0) { 187 if (gnttab_query_foreign_access(gref->gref_id)) 188 return; 189 190 if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) 191 return; 192 } 193 194 gref_size--; 195 list_del(&gref->next_gref); 196 197 if (gref->page) 198 __free_page(gref->page); 199 200 kfree(gref); 201 } 202 203 /* finds contiguous grant references in a file, returns the first */ 204 static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, 205 uint64_t index, uint32_t count) 206 { 207 struct gntalloc_gref *rv = NULL, *gref; 208 list_for_each_entry(gref, &priv->list, next_file) { 209 if (gref->file_index == index && !rv) 210 rv = gref; 211 if (rv) { 212 if (gref->file_index != index) 213 return NULL; 214 index += PAGE_SIZE; 215 count--; 216 if (count == 0) 217 return rv; 218 } 219 } 220 return NULL; 221 } 222 223 /* 224 * ------------------------------------- 225 * File operations. 226 * ------------------------------------- 227 */ 228 static int gntalloc_open(struct inode *inode, struct file *filp) 229 { 230 struct gntalloc_file_private_data *priv; 231 232 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 233 if (!priv) 234 goto out_nomem; 235 INIT_LIST_HEAD(&priv->list); 236 237 filp->private_data = priv; 238 239 pr_debug("%s: priv %p\n", __func__, priv); 240 241 return 0; 242 243 out_nomem: 244 return -ENOMEM; 245 } 246 247 static int gntalloc_release(struct inode *inode, struct file *filp) 248 { 249 struct gntalloc_file_private_data *priv = filp->private_data; 250 struct gntalloc_gref *gref; 251 252 pr_debug("%s: priv %p\n", __func__, priv); 253 254 spin_lock(&gref_lock); 255 while (!list_empty(&priv->list)) { 256 gref = list_entry(priv->list.next, 257 struct gntalloc_gref, next_file); 258 list_del(&gref->next_file); 259 gref->users--; 260 if (gref->users == 0) 261 __del_gref(gref); 262 } 263 kfree(priv); 264 spin_unlock(&gref_lock); 265 266 return 0; 267 } 268 269 static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, 270 struct ioctl_gntalloc_alloc_gref __user *arg) 271 { 272 int rc = 0; 273 struct ioctl_gntalloc_alloc_gref op; 274 uint32_t *gref_ids; 275 276 pr_debug("%s: priv %p\n", __func__, priv); 277 278 if (copy_from_user(&op, arg, sizeof(op))) { 279 rc = -EFAULT; 280 goto out; 281 } 282 283 gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); 284 if (!gref_ids) { 285 rc = -ENOMEM; 286 goto out; 287 } 288 289 spin_lock(&gref_lock); 290 /* Clean up pages that were at zero (local) users but were still mapped 291 * by remote domains. Since those pages count towards the limit that we 292 * are about to enforce, removing them here is a good idea. 293 */ 294 do_cleanup(); 295 if (gref_size + op.count > limit) { 296 spin_unlock(&gref_lock); 297 rc = -ENOSPC; 298 goto out_free; 299 } 300 gref_size += op.count; 301 op.index = priv->index; 302 priv->index += op.count * PAGE_SIZE; 303 spin_unlock(&gref_lock); 304 305 rc = add_grefs(&op, gref_ids, priv); 306 if (rc < 0) 307 goto out_free; 308 309 /* Once we finish add_grefs, it is unsafe to touch the new reference, 310 * since it is possible for a concurrent ioctl to remove it (by guessing 311 * its index). If the userspace application doesn't provide valid memory 312 * to write the IDs to, then it will need to close the file in order to 313 * release - which it will do by segfaulting when it tries to access the 314 * IDs to close them. 315 */ 316 if (copy_to_user(arg, &op, sizeof(op))) { 317 rc = -EFAULT; 318 goto out_free; 319 } 320 if (copy_to_user(arg->gref_ids, gref_ids, 321 sizeof(gref_ids[0]) * op.count)) { 322 rc = -EFAULT; 323 goto out_free; 324 } 325 326 out_free: 327 kfree(gref_ids); 328 out: 329 return rc; 330 } 331 332 static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, 333 void __user *arg) 334 { 335 int i, rc = 0; 336 struct ioctl_gntalloc_dealloc_gref op; 337 struct gntalloc_gref *gref, *n; 338 339 pr_debug("%s: priv %p\n", __func__, priv); 340 341 if (copy_from_user(&op, arg, sizeof(op))) { 342 rc = -EFAULT; 343 goto dealloc_grant_out; 344 } 345 346 spin_lock(&gref_lock); 347 gref = find_grefs(priv, op.index, op.count); 348 if (gref) { 349 /* Remove from the file list only, and decrease reference count. 350 * The later call to do_cleanup() will remove from gref_list and 351 * free the memory if the pages aren't mapped anywhere. 352 */ 353 for (i = 0; i < op.count; i++) { 354 n = list_entry(gref->next_file.next, 355 struct gntalloc_gref, next_file); 356 list_del(&gref->next_file); 357 gref->users--; 358 gref = n; 359 } 360 } else { 361 rc = -EINVAL; 362 } 363 364 do_cleanup(); 365 366 spin_unlock(&gref_lock); 367 dealloc_grant_out: 368 return rc; 369 } 370 371 static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, 372 void __user *arg) 373 { 374 struct ioctl_gntalloc_unmap_notify op; 375 struct gntalloc_gref *gref; 376 uint64_t index; 377 int pgoff; 378 int rc; 379 380 if (copy_from_user(&op, arg, sizeof(op))) 381 return -EFAULT; 382 383 index = op.index & ~(PAGE_SIZE - 1); 384 pgoff = op.index & (PAGE_SIZE - 1); 385 386 spin_lock(&gref_lock); 387 388 gref = find_grefs(priv, index, 1); 389 if (!gref) { 390 rc = -ENOENT; 391 goto unlock_out; 392 } 393 394 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { 395 rc = -EINVAL; 396 goto unlock_out; 397 } 398 399 gref->notify.flags = op.action; 400 gref->notify.pgoff = pgoff; 401 gref->notify.event = op.event_channel_port; 402 rc = 0; 403 unlock_out: 404 spin_unlock(&gref_lock); 405 return rc; 406 } 407 408 static long gntalloc_ioctl(struct file *filp, unsigned int cmd, 409 unsigned long arg) 410 { 411 struct gntalloc_file_private_data *priv = filp->private_data; 412 413 switch (cmd) { 414 case IOCTL_GNTALLOC_ALLOC_GREF: 415 return gntalloc_ioctl_alloc(priv, (void __user *)arg); 416 417 case IOCTL_GNTALLOC_DEALLOC_GREF: 418 return gntalloc_ioctl_dealloc(priv, (void __user *)arg); 419 420 case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: 421 return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg); 422 423 default: 424 return -ENOIOCTLCMD; 425 } 426 427 return 0; 428 } 429 430 static void gntalloc_vma_open(struct vm_area_struct *vma) 431 { 432 struct gntalloc_gref *gref = vma->vm_private_data; 433 if (!gref) 434 return; 435 436 spin_lock(&gref_lock); 437 gref->users++; 438 spin_unlock(&gref_lock); 439 } 440 441 static void gntalloc_vma_close(struct vm_area_struct *vma) 442 { 443 struct gntalloc_gref *gref = vma->vm_private_data; 444 if (!gref) 445 return; 446 447 spin_lock(&gref_lock); 448 gref->users--; 449 if (gref->users == 0) 450 __del_gref(gref); 451 spin_unlock(&gref_lock); 452 } 453 454 static struct vm_operations_struct gntalloc_vmops = { 455 .open = gntalloc_vma_open, 456 .close = gntalloc_vma_close, 457 }; 458 459 static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) 460 { 461 struct gntalloc_file_private_data *priv = filp->private_data; 462 struct gntalloc_gref *gref; 463 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 464 int rv, i; 465 466 pr_debug("%s: priv %p, page %lu+%d\n", __func__, 467 priv, vma->vm_pgoff, count); 468 469 if (!(vma->vm_flags & VM_SHARED)) { 470 printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); 471 return -EINVAL; 472 } 473 474 spin_lock(&gref_lock); 475 gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); 476 if (gref == NULL) { 477 rv = -ENOENT; 478 pr_debug("%s: Could not find grant reference", 479 __func__); 480 goto out_unlock; 481 } 482 483 vma->vm_private_data = gref; 484 485 vma->vm_flags |= VM_RESERVED; 486 487 vma->vm_ops = &gntalloc_vmops; 488 489 for (i = 0; i < count; i++) { 490 gref->users++; 491 rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, 492 gref->page); 493 if (rv) 494 goto out_unlock; 495 496 gref = list_entry(gref->next_file.next, 497 struct gntalloc_gref, next_file); 498 } 499 rv = 0; 500 501 out_unlock: 502 spin_unlock(&gref_lock); 503 return rv; 504 } 505 506 static const struct file_operations gntalloc_fops = { 507 .owner = THIS_MODULE, 508 .open = gntalloc_open, 509 .release = gntalloc_release, 510 .unlocked_ioctl = gntalloc_ioctl, 511 .mmap = gntalloc_mmap 512 }; 513 514 /* 515 * ------------------------------------- 516 * Module creation/destruction. 517 * ------------------------------------- 518 */ 519 static struct miscdevice gntalloc_miscdev = { 520 .minor = MISC_DYNAMIC_MINOR, 521 .name = "xen/gntalloc", 522 .fops = &gntalloc_fops, 523 }; 524 525 static int __init gntalloc_init(void) 526 { 527 int err; 528 529 if (!xen_domain()) 530 return -ENODEV; 531 532 err = misc_register(&gntalloc_miscdev); 533 if (err != 0) { 534 printk(KERN_ERR "Could not register misc gntalloc device\n"); 535 return err; 536 } 537 538 pr_debug("Created grant allocation device at %d,%d\n", 539 MISC_MAJOR, gntalloc_miscdev.minor); 540 541 return 0; 542 } 543 544 static void __exit gntalloc_exit(void) 545 { 546 misc_deregister(&gntalloc_miscdev); 547 } 548 549 module_init(gntalloc_init); 550 module_exit(gntalloc_exit); 551 552 MODULE_LICENSE("GPL"); 553 MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " 554 "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); 555 MODULE_DESCRIPTION("User-space grant reference allocator driver"); 556