1 /****************************************************************************** 2 * privcmd.c 3 * 4 * Interface to privileged domain-0 commands. 5 * 6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/sched.h> 12 #include <linux/slab.h> 13 #include <linux/string.h> 14 #include <linux/errno.h> 15 #include <linux/mm.h> 16 #include <linux/mman.h> 17 #include <linux/uaccess.h> 18 #include <linux/swap.h> 19 #include <linux/highmem.h> 20 #include <linux/pagemap.h> 21 #include <linux/seq_file.h> 22 #include <linux/miscdevice.h> 23 24 #include <asm/pgalloc.h> 25 #include <asm/pgtable.h> 26 #include <asm/tlb.h> 27 #include <asm/xen/hypervisor.h> 28 #include <asm/xen/hypercall.h> 29 30 #include <xen/xen.h> 31 #include <xen/privcmd.h> 32 #include <xen/interface/xen.h> 33 #include <xen/features.h> 34 #include <xen/page.h> 35 #include <xen/xen-ops.h> 36 37 #include "privcmd.h" 38 39 MODULE_LICENSE("GPL"); 40 41 #ifndef HAVE_ARCH_PRIVCMD_MMAP 42 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 43 #endif 44 45 static long privcmd_ioctl_hypercall(void __user *udata) 46 { 47 struct privcmd_hypercall hypercall; 48 long ret; 49 50 if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 51 return -EFAULT; 52 53 ret = privcmd_call(hypercall.op, 54 hypercall.arg[0], hypercall.arg[1], 55 hypercall.arg[2], hypercall.arg[3], 56 hypercall.arg[4]); 57 58 return ret; 59 } 60 61 static void free_page_list(struct list_head *pages) 62 { 63 struct page *p, *n; 64 65 list_for_each_entry_safe(p, n, pages, lru) 66 __free_page(p); 67 68 INIT_LIST_HEAD(pages); 69 } 70 71 /* 72 * Given an array of items in userspace, return a list of pages 73 * containing the data. If copying fails, either because of memory 74 * allocation failure or a problem reading user memory, return an 75 * error code; its up to the caller to dispose of any partial list. 76 */ 77 static int gather_array(struct list_head *pagelist, 78 unsigned nelem, size_t size, 79 void __user *data) 80 { 81 unsigned pageidx; 82 void *pagedata; 83 int ret; 84 85 if (size > PAGE_SIZE) 86 return 0; 87 88 pageidx = PAGE_SIZE; 89 pagedata = NULL; /* quiet, gcc */ 90 while (nelem--) { 91 if (pageidx > PAGE_SIZE-size) { 92 struct page *page = alloc_page(GFP_KERNEL); 93 94 ret = -ENOMEM; 95 if (page == NULL) 96 goto fail; 97 98 pagedata = page_address(page); 99 100 list_add_tail(&page->lru, pagelist); 101 pageidx = 0; 102 } 103 104 ret = -EFAULT; 105 if (copy_from_user(pagedata + pageidx, data, size)) 106 goto fail; 107 108 data += size; 109 pageidx += size; 110 } 111 112 ret = 0; 113 114 fail: 115 return ret; 116 } 117 118 /* 119 * Call function "fn" on each element of the array fragmented 120 * over a list of pages. 121 */ 122 static int traverse_pages(unsigned nelem, size_t size, 123 struct list_head *pos, 124 int (*fn)(void *data, void *state), 125 void *state) 126 { 127 void *pagedata; 128 unsigned pageidx; 129 int ret = 0; 130 131 BUG_ON(size > PAGE_SIZE); 132 133 pageidx = PAGE_SIZE; 134 pagedata = NULL; /* hush, gcc */ 135 136 while (nelem--) { 137 if (pageidx > PAGE_SIZE-size) { 138 struct page *page; 139 pos = pos->next; 140 page = list_entry(pos, struct page, lru); 141 pagedata = page_address(page); 142 pageidx = 0; 143 } 144 145 ret = (*fn)(pagedata + pageidx, state); 146 if (ret) 147 break; 148 pageidx += size; 149 } 150 151 return ret; 152 } 153 154 struct mmap_mfn_state { 155 unsigned long va; 156 struct vm_area_struct *vma; 157 domid_t domain; 158 }; 159 160 static int mmap_mfn_range(void *data, void *state) 161 { 162 struct privcmd_mmap_entry *msg = data; 163 struct mmap_mfn_state *st = state; 164 struct vm_area_struct *vma = st->vma; 165 int rc; 166 167 /* Do not allow range to wrap the address space. */ 168 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 169 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 170 return -EINVAL; 171 172 /* Range chunks must be contiguous in va space. */ 173 if ((msg->va != st->va) || 174 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 175 return -EINVAL; 176 177 rc = xen_remap_domain_mfn_range(vma, 178 msg->va & PAGE_MASK, 179 msg->mfn, msg->npages, 180 vma->vm_page_prot, 181 st->domain); 182 if (rc < 0) 183 return rc; 184 185 st->va += msg->npages << PAGE_SHIFT; 186 187 return 0; 188 } 189 190 static long privcmd_ioctl_mmap(void __user *udata) 191 { 192 struct privcmd_mmap mmapcmd; 193 struct mm_struct *mm = current->mm; 194 struct vm_area_struct *vma; 195 int rc; 196 LIST_HEAD(pagelist); 197 struct mmap_mfn_state state; 198 199 if (!xen_initial_domain()) 200 return -EPERM; 201 202 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 203 return -EFAULT; 204 205 rc = gather_array(&pagelist, 206 mmapcmd.num, sizeof(struct privcmd_mmap_entry), 207 mmapcmd.entry); 208 209 if (rc || list_empty(&pagelist)) 210 goto out; 211 212 down_write(&mm->mmap_sem); 213 214 { 215 struct page *page = list_first_entry(&pagelist, 216 struct page, lru); 217 struct privcmd_mmap_entry *msg = page_address(page); 218 219 vma = find_vma(mm, msg->va); 220 rc = -EINVAL; 221 222 if (!vma || (msg->va != vma->vm_start) || 223 !privcmd_enforce_singleshot_mapping(vma)) 224 goto out_up; 225 } 226 227 state.va = vma->vm_start; 228 state.vma = vma; 229 state.domain = mmapcmd.dom; 230 231 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 232 &pagelist, 233 mmap_mfn_range, &state); 234 235 236 out_up: 237 up_write(&mm->mmap_sem); 238 239 out: 240 free_page_list(&pagelist); 241 242 return rc; 243 } 244 245 struct mmap_batch_state { 246 domid_t domain; 247 unsigned long va; 248 struct vm_area_struct *vma; 249 int err; 250 251 xen_pfn_t __user *user; 252 }; 253 254 static int mmap_batch_fn(void *data, void *state) 255 { 256 xen_pfn_t *mfnp = data; 257 struct mmap_batch_state *st = state; 258 259 if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 260 st->vma->vm_page_prot, st->domain) < 0) { 261 *mfnp |= 0xf0000000U; 262 st->err++; 263 } 264 st->va += PAGE_SIZE; 265 266 return 0; 267 } 268 269 static int mmap_return_errors(void *data, void *state) 270 { 271 xen_pfn_t *mfnp = data; 272 struct mmap_batch_state *st = state; 273 274 return put_user(*mfnp, st->user++); 275 } 276 277 static struct vm_operations_struct privcmd_vm_ops; 278 279 static long privcmd_ioctl_mmap_batch(void __user *udata) 280 { 281 int ret; 282 struct privcmd_mmapbatch m; 283 struct mm_struct *mm = current->mm; 284 struct vm_area_struct *vma; 285 unsigned long nr_pages; 286 LIST_HEAD(pagelist); 287 struct mmap_batch_state state; 288 289 if (!xen_initial_domain()) 290 return -EPERM; 291 292 if (copy_from_user(&m, udata, sizeof(m))) 293 return -EFAULT; 294 295 nr_pages = m.num; 296 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 297 return -EINVAL; 298 299 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), 300 m.arr); 301 302 if (ret || list_empty(&pagelist)) 303 goto out; 304 305 down_write(&mm->mmap_sem); 306 307 vma = find_vma(mm, m.addr); 308 ret = -EINVAL; 309 if (!vma || 310 vma->vm_ops != &privcmd_vm_ops || 311 (m.addr != vma->vm_start) || 312 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || 313 !privcmd_enforce_singleshot_mapping(vma)) { 314 up_write(&mm->mmap_sem); 315 goto out; 316 } 317 318 state.domain = m.dom; 319 state.vma = vma; 320 state.va = m.addr; 321 state.err = 0; 322 323 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 324 &pagelist, mmap_batch_fn, &state); 325 326 up_write(&mm->mmap_sem); 327 328 if (state.err > 0) { 329 state.user = m.arr; 330 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 331 &pagelist, 332 mmap_return_errors, &state); 333 } 334 335 out: 336 free_page_list(&pagelist); 337 338 return ret; 339 } 340 341 static long privcmd_ioctl(struct file *file, 342 unsigned int cmd, unsigned long data) 343 { 344 int ret = -ENOSYS; 345 void __user *udata = (void __user *) data; 346 347 switch (cmd) { 348 case IOCTL_PRIVCMD_HYPERCALL: 349 ret = privcmd_ioctl_hypercall(udata); 350 break; 351 352 case IOCTL_PRIVCMD_MMAP: 353 ret = privcmd_ioctl_mmap(udata); 354 break; 355 356 case IOCTL_PRIVCMD_MMAPBATCH: 357 ret = privcmd_ioctl_mmap_batch(udata); 358 break; 359 360 default: 361 ret = -EINVAL; 362 break; 363 } 364 365 return ret; 366 } 367 368 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 369 { 370 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 371 vma, vma->vm_start, vma->vm_end, 372 vmf->pgoff, vmf->virtual_address); 373 374 return VM_FAULT_SIGBUS; 375 } 376 377 static struct vm_operations_struct privcmd_vm_ops = { 378 .fault = privcmd_fault 379 }; 380 381 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 382 { 383 /* Unsupported for auto-translate guests. */ 384 if (xen_feature(XENFEAT_auto_translated_physmap)) 385 return -ENOSYS; 386 387 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 388 * how to recreate these mappings */ 389 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; 390 vma->vm_ops = &privcmd_vm_ops; 391 vma->vm_private_data = NULL; 392 393 return 0; 394 } 395 396 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 397 { 398 return (xchg(&vma->vm_private_data, (void *)1) == NULL); 399 } 400 401 const struct file_operations xen_privcmd_fops = { 402 .owner = THIS_MODULE, 403 .unlocked_ioctl = privcmd_ioctl, 404 .mmap = privcmd_mmap, 405 }; 406 EXPORT_SYMBOL_GPL(xen_privcmd_fops); 407 408 static struct miscdevice privcmd_dev = { 409 .minor = MISC_DYNAMIC_MINOR, 410 .name = "xen/privcmd", 411 .fops = &xen_privcmd_fops, 412 }; 413 414 static int __init privcmd_init(void) 415 { 416 int err; 417 418 if (!xen_domain()) 419 return -ENODEV; 420 421 err = misc_register(&privcmd_dev); 422 if (err != 0) { 423 printk(KERN_ERR "Could not register Xen privcmd device\n"); 424 return err; 425 } 426 return 0; 427 } 428 429 static void __exit privcmd_exit(void) 430 { 431 misc_deregister(&privcmd_dev); 432 } 433 434 module_init(privcmd_init); 435 module_exit(privcmd_exit); 436