1 /****************************************************************************** 2 * privcmd.c 3 * 4 * Interface to privileged domain-0 commands. 5 * 6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/sched.h> 12 #include <linux/slab.h> 13 #include <linux/string.h> 14 #include <linux/errno.h> 15 #include <linux/mm.h> 16 #include <linux/mman.h> 17 #include <linux/uaccess.h> 18 #include <linux/swap.h> 19 #include <linux/highmem.h> 20 #include <linux/pagemap.h> 21 #include <linux/seq_file.h> 22 #include <linux/miscdevice.h> 23 24 #include <asm/pgalloc.h> 25 #include <asm/pgtable.h> 26 #include <asm/tlb.h> 27 #include <asm/xen/hypervisor.h> 28 #include <asm/xen/hypercall.h> 29 30 #include <xen/xen.h> 31 #include <xen/privcmd.h> 32 #include <xen/interface/xen.h> 33 #include <xen/features.h> 34 #include <xen/page.h> 35 #include <xen/xen-ops.h> 36 #include <xen/balloon.h> 37 38 #include "privcmd.h" 39 40 MODULE_LICENSE("GPL"); 41 42 #define PRIV_VMA_LOCKED ((void *)1) 43 44 #ifndef HAVE_ARCH_PRIVCMD_MMAP 45 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 46 #endif 47 48 static long privcmd_ioctl_hypercall(void __user *udata) 49 { 50 struct privcmd_hypercall hypercall; 51 long ret; 52 53 if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 54 return -EFAULT; 55 56 ret = privcmd_call(hypercall.op, 57 hypercall.arg[0], hypercall.arg[1], 58 hypercall.arg[2], hypercall.arg[3], 59 hypercall.arg[4]); 60 61 return ret; 62 } 63 64 static void free_page_list(struct list_head *pages) 65 { 66 struct page *p, *n; 67 68 list_for_each_entry_safe(p, n, pages, lru) 69 __free_page(p); 70 71 INIT_LIST_HEAD(pages); 72 } 73 74 /* 75 * Given an array of items in userspace, return a list of pages 76 * containing the data. If copying fails, either because of memory 77 * allocation failure or a problem reading user memory, return an 78 * error code; its up to the caller to dispose of any partial list. 79 */ 80 static int gather_array(struct list_head *pagelist, 81 unsigned nelem, size_t size, 82 const void __user *data) 83 { 84 unsigned pageidx; 85 void *pagedata; 86 int ret; 87 88 if (size > PAGE_SIZE) 89 return 0; 90 91 pageidx = PAGE_SIZE; 92 pagedata = NULL; /* quiet, gcc */ 93 while (nelem--) { 94 if (pageidx > PAGE_SIZE-size) { 95 struct page *page = alloc_page(GFP_KERNEL); 96 97 ret = -ENOMEM; 98 if (page == NULL) 99 goto fail; 100 101 pagedata = page_address(page); 102 103 list_add_tail(&page->lru, pagelist); 104 pageidx = 0; 105 } 106 107 ret = -EFAULT; 108 if (copy_from_user(pagedata + pageidx, data, size)) 109 goto fail; 110 111 data += size; 112 pageidx += size; 113 } 114 115 ret = 0; 116 117 fail: 118 return ret; 119 } 120 121 /* 122 * Call function "fn" on each element of the array fragmented 123 * over a list of pages. 124 */ 125 static int traverse_pages(unsigned nelem, size_t size, 126 struct list_head *pos, 127 int (*fn)(void *data, void *state), 128 void *state) 129 { 130 void *pagedata; 131 unsigned pageidx; 132 int ret = 0; 133 134 BUG_ON(size > PAGE_SIZE); 135 136 pageidx = PAGE_SIZE; 137 pagedata = NULL; /* hush, gcc */ 138 139 while (nelem--) { 140 if (pageidx > PAGE_SIZE-size) { 141 struct page *page; 142 pos = pos->next; 143 page = list_entry(pos, struct page, lru); 144 pagedata = page_address(page); 145 pageidx = 0; 146 } 147 148 ret = (*fn)(pagedata + pageidx, state); 149 if (ret) 150 break; 151 pageidx += size; 152 } 153 154 return ret; 155 } 156 157 struct mmap_mfn_state { 158 unsigned long va; 159 struct vm_area_struct *vma; 160 domid_t domain; 161 }; 162 163 static int mmap_mfn_range(void *data, void *state) 164 { 165 struct privcmd_mmap_entry *msg = data; 166 struct mmap_mfn_state *st = state; 167 struct vm_area_struct *vma = st->vma; 168 int rc; 169 170 /* Do not allow range to wrap the address space. */ 171 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 172 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 173 return -EINVAL; 174 175 /* Range chunks must be contiguous in va space. */ 176 if ((msg->va != st->va) || 177 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 178 return -EINVAL; 179 180 rc = xen_remap_domain_mfn_range(vma, 181 msg->va & PAGE_MASK, 182 msg->mfn, msg->npages, 183 vma->vm_page_prot, 184 st->domain, NULL); 185 if (rc < 0) 186 return rc; 187 188 st->va += msg->npages << PAGE_SHIFT; 189 190 return 0; 191 } 192 193 static long privcmd_ioctl_mmap(void __user *udata) 194 { 195 struct privcmd_mmap mmapcmd; 196 struct mm_struct *mm = current->mm; 197 struct vm_area_struct *vma; 198 int rc; 199 LIST_HEAD(pagelist); 200 struct mmap_mfn_state state; 201 202 /* We only support privcmd_ioctl_mmap_batch for auto translated. */ 203 if (xen_feature(XENFEAT_auto_translated_physmap)) 204 return -ENOSYS; 205 206 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 207 return -EFAULT; 208 209 rc = gather_array(&pagelist, 210 mmapcmd.num, sizeof(struct privcmd_mmap_entry), 211 mmapcmd.entry); 212 213 if (rc || list_empty(&pagelist)) 214 goto out; 215 216 down_write(&mm->mmap_sem); 217 218 { 219 struct page *page = list_first_entry(&pagelist, 220 struct page, lru); 221 struct privcmd_mmap_entry *msg = page_address(page); 222 223 vma = find_vma(mm, msg->va); 224 rc = -EINVAL; 225 226 if (!vma || (msg->va != vma->vm_start) || 227 !privcmd_enforce_singleshot_mapping(vma)) 228 goto out_up; 229 } 230 231 state.va = vma->vm_start; 232 state.vma = vma; 233 state.domain = mmapcmd.dom; 234 235 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 236 &pagelist, 237 mmap_mfn_range, &state); 238 239 240 out_up: 241 up_write(&mm->mmap_sem); 242 243 out: 244 free_page_list(&pagelist); 245 246 return rc; 247 } 248 249 struct mmap_batch_state { 250 domid_t domain; 251 unsigned long va; 252 struct vm_area_struct *vma; 253 int index; 254 /* A tristate: 255 * 0 for no errors 256 * 1 if at least one error has happened (and no 257 * -ENOENT errors have happened) 258 * -ENOENT if at least 1 -ENOENT has happened. 259 */ 260 int global_error; 261 int version; 262 263 /* User-space mfn array to store errors in the second pass for V1. */ 264 xen_pfn_t __user *user_mfn; 265 /* User-space int array to store errors in the second pass for V2. */ 266 int __user *user_err; 267 }; 268 269 /* auto translated dom0 note: if domU being created is PV, then mfn is 270 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). 271 */ 272 static int mmap_batch_fn(void *data, void *state) 273 { 274 xen_pfn_t *mfnp = data; 275 struct mmap_batch_state *st = state; 276 struct vm_area_struct *vma = st->vma; 277 struct page **pages = vma->vm_private_data; 278 struct page *cur_page = NULL; 279 int ret; 280 281 if (xen_feature(XENFEAT_auto_translated_physmap)) 282 cur_page = pages[st->index++]; 283 284 ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 285 st->vma->vm_page_prot, st->domain, 286 &cur_page); 287 288 /* Store error code for second pass. */ 289 if (st->version == 1) { 290 if (ret < 0) { 291 /* 292 * V1 encodes the error codes in the 32bit top nibble of the 293 * mfn (with its known limitations vis-a-vis 64 bit callers). 294 */ 295 *mfnp |= (ret == -ENOENT) ? 296 PRIVCMD_MMAPBATCH_PAGED_ERROR : 297 PRIVCMD_MMAPBATCH_MFN_ERROR; 298 } 299 } else { /* st->version == 2 */ 300 *((int *) mfnp) = ret; 301 } 302 303 /* And see if it affects the global_error. */ 304 if (ret < 0) { 305 if (ret == -ENOENT) 306 st->global_error = -ENOENT; 307 else { 308 /* Record that at least one error has happened. */ 309 if (st->global_error == 0) 310 st->global_error = 1; 311 } 312 } 313 st->va += PAGE_SIZE; 314 315 return 0; 316 } 317 318 static int mmap_return_errors(void *data, void *state) 319 { 320 struct mmap_batch_state *st = state; 321 322 if (st->version == 1) { 323 xen_pfn_t mfnp = *((xen_pfn_t *) data); 324 if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) 325 return __put_user(mfnp, st->user_mfn++); 326 else 327 st->user_mfn++; 328 } else { /* st->version == 2 */ 329 int err = *((int *) data); 330 if (err) 331 return __put_user(err, st->user_err++); 332 else 333 st->user_err++; 334 } 335 336 return 0; 337 } 338 339 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update 340 * the vma with the page info to use later. 341 * Returns: 0 if success, otherwise -errno 342 */ 343 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) 344 { 345 int rc; 346 struct page **pages; 347 348 pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); 349 if (pages == NULL) 350 return -ENOMEM; 351 352 rc = alloc_xenballooned_pages(numpgs, pages, 0); 353 if (rc != 0) { 354 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 355 numpgs, rc); 356 kfree(pages); 357 return -ENOMEM; 358 } 359 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); 360 vma->vm_private_data = pages; 361 362 return 0; 363 } 364 365 static struct vm_operations_struct privcmd_vm_ops; 366 367 static long privcmd_ioctl_mmap_batch(void __user *udata, int version) 368 { 369 int ret; 370 struct privcmd_mmapbatch_v2 m; 371 struct mm_struct *mm = current->mm; 372 struct vm_area_struct *vma; 373 unsigned long nr_pages; 374 LIST_HEAD(pagelist); 375 struct mmap_batch_state state; 376 377 switch (version) { 378 case 1: 379 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 380 return -EFAULT; 381 /* Returns per-frame error in m.arr. */ 382 m.err = NULL; 383 if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 384 return -EFAULT; 385 break; 386 case 2: 387 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 388 return -EFAULT; 389 /* Returns per-frame error code in m.err. */ 390 if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 391 return -EFAULT; 392 break; 393 default: 394 return -EINVAL; 395 } 396 397 nr_pages = m.num; 398 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 399 return -EINVAL; 400 401 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 402 403 if (ret) 404 goto out; 405 if (list_empty(&pagelist)) { 406 ret = -EINVAL; 407 goto out; 408 } 409 410 if (version == 2) { 411 /* Zero error array now to only copy back actual errors. */ 412 if (clear_user(m.err, sizeof(int) * m.num)) { 413 ret = -EFAULT; 414 goto out; 415 } 416 } 417 418 down_write(&mm->mmap_sem); 419 420 vma = find_vma(mm, m.addr); 421 if (!vma || 422 vma->vm_ops != &privcmd_vm_ops || 423 (m.addr != vma->vm_start) || 424 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || 425 !privcmd_enforce_singleshot_mapping(vma)) { 426 up_write(&mm->mmap_sem); 427 ret = -EINVAL; 428 goto out; 429 } 430 if (xen_feature(XENFEAT_auto_translated_physmap)) { 431 ret = alloc_empty_pages(vma, m.num); 432 if (ret < 0) { 433 up_write(&mm->mmap_sem); 434 goto out; 435 } 436 } 437 438 state.domain = m.dom; 439 state.vma = vma; 440 state.va = m.addr; 441 state.index = 0; 442 state.global_error = 0; 443 state.version = version; 444 445 /* mmap_batch_fn guarantees ret == 0 */ 446 BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), 447 &pagelist, mmap_batch_fn, &state)); 448 449 up_write(&mm->mmap_sem); 450 451 if (state.global_error) { 452 /* Write back errors in second pass. */ 453 state.user_mfn = (xen_pfn_t *)m.arr; 454 state.user_err = m.err; 455 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 456 &pagelist, mmap_return_errors, &state); 457 } else 458 ret = 0; 459 460 /* If we have not had any EFAULT-like global errors then set the global 461 * error to -ENOENT if necessary. */ 462 if ((ret == 0) && (state.global_error == -ENOENT)) 463 ret = -ENOENT; 464 465 out: 466 free_page_list(&pagelist); 467 468 return ret; 469 } 470 471 static long privcmd_ioctl(struct file *file, 472 unsigned int cmd, unsigned long data) 473 { 474 int ret = -ENOSYS; 475 void __user *udata = (void __user *) data; 476 477 switch (cmd) { 478 case IOCTL_PRIVCMD_HYPERCALL: 479 ret = privcmd_ioctl_hypercall(udata); 480 break; 481 482 case IOCTL_PRIVCMD_MMAP: 483 ret = privcmd_ioctl_mmap(udata); 484 break; 485 486 case IOCTL_PRIVCMD_MMAPBATCH: 487 ret = privcmd_ioctl_mmap_batch(udata, 1); 488 break; 489 490 case IOCTL_PRIVCMD_MMAPBATCH_V2: 491 ret = privcmd_ioctl_mmap_batch(udata, 2); 492 break; 493 494 default: 495 ret = -EINVAL; 496 break; 497 } 498 499 return ret; 500 } 501 502 static void privcmd_close(struct vm_area_struct *vma) 503 { 504 struct page **pages = vma->vm_private_data; 505 int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 506 507 if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages)) 508 return; 509 510 xen_unmap_domain_mfn_range(vma, numpgs, pages); 511 free_xenballooned_pages(numpgs, pages); 512 kfree(pages); 513 } 514 515 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 516 { 517 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 518 vma, vma->vm_start, vma->vm_end, 519 vmf->pgoff, vmf->virtual_address); 520 521 return VM_FAULT_SIGBUS; 522 } 523 524 static struct vm_operations_struct privcmd_vm_ops = { 525 .close = privcmd_close, 526 .fault = privcmd_fault 527 }; 528 529 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 530 { 531 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 532 * how to recreate these mappings */ 533 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | 534 VM_DONTEXPAND | VM_DONTDUMP; 535 vma->vm_ops = &privcmd_vm_ops; 536 vma->vm_private_data = NULL; 537 538 return 0; 539 } 540 541 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 542 { 543 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); 544 } 545 546 const struct file_operations xen_privcmd_fops = { 547 .owner = THIS_MODULE, 548 .unlocked_ioctl = privcmd_ioctl, 549 .mmap = privcmd_mmap, 550 }; 551 EXPORT_SYMBOL_GPL(xen_privcmd_fops); 552 553 static struct miscdevice privcmd_dev = { 554 .minor = MISC_DYNAMIC_MINOR, 555 .name = "xen/privcmd", 556 .fops = &xen_privcmd_fops, 557 }; 558 559 static int __init privcmd_init(void) 560 { 561 int err; 562 563 if (!xen_domain()) 564 return -ENODEV; 565 566 err = misc_register(&privcmd_dev); 567 if (err != 0) { 568 printk(KERN_ERR "Could not register Xen privcmd device\n"); 569 return err; 570 } 571 return 0; 572 } 573 574 static void __exit privcmd_exit(void) 575 { 576 misc_deregister(&privcmd_dev); 577 } 578 579 module_init(privcmd_init); 580 module_exit(privcmd_exit); 581