1 /****************************************************************************** 2 * privcmd.c 3 * 4 * Interface to privileged domain-0 commands. 5 * 6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/sched.h> 12 #include <linux/slab.h> 13 #include <linux/string.h> 14 #include <linux/errno.h> 15 #include <linux/mm.h> 16 #include <linux/mman.h> 17 #include <linux/uaccess.h> 18 #include <linux/swap.h> 19 #include <linux/highmem.h> 20 #include <linux/pagemap.h> 21 #include <linux/seq_file.h> 22 #include <linux/miscdevice.h> 23 24 #include <asm/pgalloc.h> 25 #include <asm/pgtable.h> 26 #include <asm/tlb.h> 27 #include <asm/xen/hypervisor.h> 28 #include <asm/xen/hypercall.h> 29 30 #include <xen/xen.h> 31 #include <xen/privcmd.h> 32 #include <xen/interface/xen.h> 33 #include <xen/features.h> 34 #include <xen/page.h> 35 #include <xen/xen-ops.h> 36 37 #include "privcmd.h" 38 39 MODULE_LICENSE("GPL"); 40 41 #ifndef HAVE_ARCH_PRIVCMD_MMAP 42 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 43 #endif 44 45 static long privcmd_ioctl_hypercall(void __user *udata) 46 { 47 struct privcmd_hypercall hypercall; 48 long ret; 49 50 if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 51 return -EFAULT; 52 53 ret = privcmd_call(hypercall.op, 54 hypercall.arg[0], hypercall.arg[1], 55 hypercall.arg[2], hypercall.arg[3], 56 hypercall.arg[4]); 57 58 return ret; 59 } 60 61 static void free_page_list(struct list_head *pages) 62 { 63 struct page *p, *n; 64 65 list_for_each_entry_safe(p, n, pages, lru) 66 __free_page(p); 67 68 INIT_LIST_HEAD(pages); 69 } 70 71 /* 72 * Given an array of items in userspace, return a list of pages 73 * containing the data. If copying fails, either because of memory 74 * allocation failure or a problem reading user memory, return an 75 * error code; its up to the caller to dispose of any partial list. 76 */ 77 static int gather_array(struct list_head *pagelist, 78 unsigned nelem, size_t size, 79 const void __user *data) 80 { 81 unsigned pageidx; 82 void *pagedata; 83 int ret; 84 85 if (size > PAGE_SIZE) 86 return 0; 87 88 pageidx = PAGE_SIZE; 89 pagedata = NULL; /* quiet, gcc */ 90 while (nelem--) { 91 if (pageidx > PAGE_SIZE-size) { 92 struct page *page = alloc_page(GFP_KERNEL); 93 94 ret = -ENOMEM; 95 if (page == NULL) 96 goto fail; 97 98 pagedata = page_address(page); 99 100 list_add_tail(&page->lru, pagelist); 101 pageidx = 0; 102 } 103 104 ret = -EFAULT; 105 if (copy_from_user(pagedata + pageidx, data, size)) 106 goto fail; 107 108 data += size; 109 pageidx += size; 110 } 111 112 ret = 0; 113 114 fail: 115 return ret; 116 } 117 118 /* 119 * Call function "fn" on each element of the array fragmented 120 * over a list of pages. 121 */ 122 static int traverse_pages(unsigned nelem, size_t size, 123 struct list_head *pos, 124 int (*fn)(void *data, void *state), 125 void *state) 126 { 127 void *pagedata; 128 unsigned pageidx; 129 int ret = 0; 130 131 BUG_ON(size > PAGE_SIZE); 132 133 pageidx = PAGE_SIZE; 134 pagedata = NULL; /* hush, gcc */ 135 136 while (nelem--) { 137 if (pageidx > PAGE_SIZE-size) { 138 struct page *page; 139 pos = pos->next; 140 page = list_entry(pos, struct page, lru); 141 pagedata = page_address(page); 142 pageidx = 0; 143 } 144 145 ret = (*fn)(pagedata + pageidx, state); 146 if (ret) 147 break; 148 pageidx += size; 149 } 150 151 return ret; 152 } 153 154 struct mmap_mfn_state { 155 unsigned long va; 156 struct vm_area_struct *vma; 157 domid_t domain; 158 }; 159 160 static int mmap_mfn_range(void *data, void *state) 161 { 162 struct privcmd_mmap_entry *msg = data; 163 struct mmap_mfn_state *st = state; 164 struct vm_area_struct *vma = st->vma; 165 int rc; 166 167 /* Do not allow range to wrap the address space. */ 168 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 169 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 170 return -EINVAL; 171 172 /* Range chunks must be contiguous in va space. */ 173 if ((msg->va != st->va) || 174 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 175 return -EINVAL; 176 177 rc = xen_remap_domain_mfn_range(vma, 178 msg->va & PAGE_MASK, 179 msg->mfn, msg->npages, 180 vma->vm_page_prot, 181 st->domain); 182 if (rc < 0) 183 return rc; 184 185 st->va += msg->npages << PAGE_SHIFT; 186 187 return 0; 188 } 189 190 static long privcmd_ioctl_mmap(void __user *udata) 191 { 192 struct privcmd_mmap mmapcmd; 193 struct mm_struct *mm = current->mm; 194 struct vm_area_struct *vma; 195 int rc; 196 LIST_HEAD(pagelist); 197 struct mmap_mfn_state state; 198 199 if (!xen_initial_domain()) 200 return -EPERM; 201 202 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 203 return -EFAULT; 204 205 rc = gather_array(&pagelist, 206 mmapcmd.num, sizeof(struct privcmd_mmap_entry), 207 mmapcmd.entry); 208 209 if (rc || list_empty(&pagelist)) 210 goto out; 211 212 down_write(&mm->mmap_sem); 213 214 { 215 struct page *page = list_first_entry(&pagelist, 216 struct page, lru); 217 struct privcmd_mmap_entry *msg = page_address(page); 218 219 vma = find_vma(mm, msg->va); 220 rc = -EINVAL; 221 222 if (!vma || (msg->va != vma->vm_start) || 223 !privcmd_enforce_singleshot_mapping(vma)) 224 goto out_up; 225 } 226 227 state.va = vma->vm_start; 228 state.vma = vma; 229 state.domain = mmapcmd.dom; 230 231 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 232 &pagelist, 233 mmap_mfn_range, &state); 234 235 236 out_up: 237 up_write(&mm->mmap_sem); 238 239 out: 240 free_page_list(&pagelist); 241 242 return rc; 243 } 244 245 struct mmap_batch_state { 246 domid_t domain; 247 unsigned long va; 248 struct vm_area_struct *vma; 249 /* A tristate: 250 * 0 for no errors 251 * 1 if at least one error has happened (and no 252 * -ENOENT errors have happened) 253 * -ENOENT if at least 1 -ENOENT has happened. 254 */ 255 int global_error; 256 /* An array for individual errors */ 257 int *err; 258 259 /* User-space mfn array to store errors in the second pass for V1. */ 260 xen_pfn_t __user *user_mfn; 261 }; 262 263 static int mmap_batch_fn(void *data, void *state) 264 { 265 xen_pfn_t *mfnp = data; 266 struct mmap_batch_state *st = state; 267 int ret; 268 269 ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 270 st->vma->vm_page_prot, st->domain); 271 272 /* Store error code for second pass. */ 273 *(st->err++) = ret; 274 275 /* And see if it affects the global_error. */ 276 if (ret < 0) { 277 if (ret == -ENOENT) 278 st->global_error = -ENOENT; 279 else { 280 /* Record that at least one error has happened. */ 281 if (st->global_error == 0) 282 st->global_error = 1; 283 } 284 } 285 st->va += PAGE_SIZE; 286 287 return 0; 288 } 289 290 static int mmap_return_errors_v1(void *data, void *state) 291 { 292 xen_pfn_t *mfnp = data; 293 struct mmap_batch_state *st = state; 294 int err = *(st->err++); 295 296 /* 297 * V1 encodes the error codes in the 32bit top nibble of the 298 * mfn (with its known limitations vis-a-vis 64 bit callers). 299 */ 300 *mfnp |= (err == -ENOENT) ? 301 PRIVCMD_MMAPBATCH_PAGED_ERROR : 302 PRIVCMD_MMAPBATCH_MFN_ERROR; 303 return __put_user(*mfnp, st->user_mfn++); 304 } 305 306 static struct vm_operations_struct privcmd_vm_ops; 307 308 static long privcmd_ioctl_mmap_batch(void __user *udata, int version) 309 { 310 int ret; 311 struct privcmd_mmapbatch_v2 m; 312 struct mm_struct *mm = current->mm; 313 struct vm_area_struct *vma; 314 unsigned long nr_pages; 315 LIST_HEAD(pagelist); 316 int *err_array = NULL; 317 struct mmap_batch_state state; 318 319 if (!xen_initial_domain()) 320 return -EPERM; 321 322 switch (version) { 323 case 1: 324 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 325 return -EFAULT; 326 /* Returns per-frame error in m.arr. */ 327 m.err = NULL; 328 if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 329 return -EFAULT; 330 break; 331 case 2: 332 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 333 return -EFAULT; 334 /* Returns per-frame error code in m.err. */ 335 if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 336 return -EFAULT; 337 break; 338 default: 339 return -EINVAL; 340 } 341 342 nr_pages = m.num; 343 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 344 return -EINVAL; 345 346 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 347 348 if (ret) 349 goto out; 350 if (list_empty(&pagelist)) { 351 ret = -EINVAL; 352 goto out; 353 } 354 355 err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); 356 if (err_array == NULL) { 357 ret = -ENOMEM; 358 goto out; 359 } 360 361 down_write(&mm->mmap_sem); 362 363 vma = find_vma(mm, m.addr); 364 if (!vma || 365 vma->vm_ops != &privcmd_vm_ops || 366 (m.addr != vma->vm_start) || 367 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || 368 !privcmd_enforce_singleshot_mapping(vma)) { 369 up_write(&mm->mmap_sem); 370 ret = -EINVAL; 371 goto out; 372 } 373 374 state.domain = m.dom; 375 state.vma = vma; 376 state.va = m.addr; 377 state.global_error = 0; 378 state.err = err_array; 379 380 /* mmap_batch_fn guarantees ret == 0 */ 381 BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), 382 &pagelist, mmap_batch_fn, &state)); 383 384 up_write(&mm->mmap_sem); 385 386 if (version == 1) { 387 if (state.global_error) { 388 /* Write back errors in second pass. */ 389 state.user_mfn = (xen_pfn_t *)m.arr; 390 state.err = err_array; 391 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 392 &pagelist, mmap_return_errors_v1, &state); 393 } else 394 ret = 0; 395 396 } else if (version == 2) { 397 ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); 398 if (ret) 399 ret = -EFAULT; 400 } 401 402 /* If we have not had any EFAULT-like global errors then set the global 403 * error to -ENOENT if necessary. */ 404 if ((ret == 0) && (state.global_error == -ENOENT)) 405 ret = -ENOENT; 406 407 out: 408 kfree(err_array); 409 free_page_list(&pagelist); 410 411 return ret; 412 } 413 414 static long privcmd_ioctl(struct file *file, 415 unsigned int cmd, unsigned long data) 416 { 417 int ret = -ENOSYS; 418 void __user *udata = (void __user *) data; 419 420 switch (cmd) { 421 case IOCTL_PRIVCMD_HYPERCALL: 422 ret = privcmd_ioctl_hypercall(udata); 423 break; 424 425 case IOCTL_PRIVCMD_MMAP: 426 ret = privcmd_ioctl_mmap(udata); 427 break; 428 429 case IOCTL_PRIVCMD_MMAPBATCH: 430 ret = privcmd_ioctl_mmap_batch(udata, 1); 431 break; 432 433 case IOCTL_PRIVCMD_MMAPBATCH_V2: 434 ret = privcmd_ioctl_mmap_batch(udata, 2); 435 break; 436 437 default: 438 ret = -EINVAL; 439 break; 440 } 441 442 return ret; 443 } 444 445 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 446 { 447 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 448 vma, vma->vm_start, vma->vm_end, 449 vmf->pgoff, vmf->virtual_address); 450 451 return VM_FAULT_SIGBUS; 452 } 453 454 static struct vm_operations_struct privcmd_vm_ops = { 455 .fault = privcmd_fault 456 }; 457 458 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 459 { 460 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 461 * how to recreate these mappings */ 462 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | 463 VM_DONTEXPAND | VM_DONTDUMP; 464 vma->vm_ops = &privcmd_vm_ops; 465 vma->vm_private_data = NULL; 466 467 return 0; 468 } 469 470 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 471 { 472 return (xchg(&vma->vm_private_data, (void *)1) == NULL); 473 } 474 475 const struct file_operations xen_privcmd_fops = { 476 .owner = THIS_MODULE, 477 .unlocked_ioctl = privcmd_ioctl, 478 .mmap = privcmd_mmap, 479 }; 480 EXPORT_SYMBOL_GPL(xen_privcmd_fops); 481 482 static struct miscdevice privcmd_dev = { 483 .minor = MISC_DYNAMIC_MINOR, 484 .name = "xen/privcmd", 485 .fops = &xen_privcmd_fops, 486 }; 487 488 static int __init privcmd_init(void) 489 { 490 int err; 491 492 if (!xen_domain()) 493 return -ENODEV; 494 495 err = misc_register(&privcmd_dev); 496 if (err != 0) { 497 printk(KERN_ERR "Could not register Xen privcmd device\n"); 498 return err; 499 } 500 return 0; 501 } 502 503 static void __exit privcmd_exit(void) 504 { 505 misc_deregister(&privcmd_dev); 506 } 507 508 module_init(privcmd_init); 509 module_exit(privcmd_exit); 510