1d8414d3cSBastian Blank /****************************************************************************** 2d8414d3cSBastian Blank * privcmd.c 3d8414d3cSBastian Blank * 4d8414d3cSBastian Blank * Interface to privileged domain-0 commands. 5d8414d3cSBastian Blank * 6d8414d3cSBastian Blank * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7d8414d3cSBastian Blank */ 8d8414d3cSBastian Blank 9283c0972SJoe Perches #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 10283c0972SJoe Perches 11d8414d3cSBastian Blank #include <linux/kernel.h> 12d8414d3cSBastian Blank #include <linux/module.h> 13d8414d3cSBastian Blank #include <linux/sched.h> 14d8414d3cSBastian Blank #include <linux/slab.h> 15d8414d3cSBastian Blank #include <linux/string.h> 16d8414d3cSBastian Blank #include <linux/errno.h> 17d8414d3cSBastian Blank #include <linux/mm.h> 18d8414d3cSBastian Blank #include <linux/mman.h> 19d8414d3cSBastian Blank #include <linux/uaccess.h> 20d8414d3cSBastian Blank #include <linux/swap.h> 21d8414d3cSBastian Blank #include <linux/highmem.h> 22d8414d3cSBastian Blank #include <linux/pagemap.h> 23d8414d3cSBastian Blank #include <linux/seq_file.h> 24d8414d3cSBastian Blank #include <linux/miscdevice.h> 25d8414d3cSBastian Blank 26d8414d3cSBastian Blank #include <asm/pgalloc.h> 27d8414d3cSBastian Blank #include <asm/pgtable.h> 28d8414d3cSBastian Blank #include <asm/tlb.h> 29d8414d3cSBastian Blank #include <asm/xen/hypervisor.h> 30d8414d3cSBastian Blank #include <asm/xen/hypercall.h> 31d8414d3cSBastian Blank 32d8414d3cSBastian Blank #include <xen/xen.h> 33d8414d3cSBastian Blank #include <xen/privcmd.h> 34d8414d3cSBastian Blank #include <xen/interface/xen.h> 35d8414d3cSBastian Blank #include <xen/features.h> 36d8414d3cSBastian Blank #include <xen/page.h> 37d8414d3cSBastian Blank #include <xen/xen-ops.h> 38d71f5139SMukesh Rathor #include <xen/balloon.h> 39d8414d3cSBastian Blank 40d8414d3cSBastian Blank #include "privcmd.h" 41d8414d3cSBastian Blank 42d8414d3cSBastian Blank MODULE_LICENSE("GPL"); 43d8414d3cSBastian Blank 44d71f5139SMukesh Rathor #define PRIV_VMA_LOCKED ((void *)1) 45d71f5139SMukesh Rathor 46a5deabe0SAndres Lagar-Cavilla static int privcmd_vma_range_is_mapped( 47a5deabe0SAndres Lagar-Cavilla struct vm_area_struct *vma, 48a5deabe0SAndres Lagar-Cavilla unsigned long addr, 49a5deabe0SAndres Lagar-Cavilla unsigned long nr_pages); 50d8414d3cSBastian Blank 51d8414d3cSBastian Blank static long privcmd_ioctl_hypercall(void __user *udata) 52d8414d3cSBastian Blank { 53d8414d3cSBastian Blank struct privcmd_hypercall hypercall; 54d8414d3cSBastian Blank long ret; 55d8414d3cSBastian Blank 56d8414d3cSBastian Blank if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 57d8414d3cSBastian Blank return -EFAULT; 58d8414d3cSBastian Blank 59fdfd811dSDavid Vrabel xen_preemptible_hcall_begin(); 60d8414d3cSBastian Blank ret = privcmd_call(hypercall.op, 61d8414d3cSBastian Blank hypercall.arg[0], hypercall.arg[1], 62d8414d3cSBastian Blank hypercall.arg[2], hypercall.arg[3], 63d8414d3cSBastian Blank hypercall.arg[4]); 64fdfd811dSDavid Vrabel xen_preemptible_hcall_end(); 65d8414d3cSBastian Blank 66d8414d3cSBastian Blank return ret; 67d8414d3cSBastian Blank } 68d8414d3cSBastian Blank 69d8414d3cSBastian Blank static void free_page_list(struct list_head *pages) 70d8414d3cSBastian Blank { 71d8414d3cSBastian Blank struct page *p, *n; 72d8414d3cSBastian Blank 73d8414d3cSBastian Blank list_for_each_entry_safe(p, n, pages, lru) 74d8414d3cSBastian Blank __free_page(p); 75d8414d3cSBastian Blank 76d8414d3cSBastian Blank INIT_LIST_HEAD(pages); 77d8414d3cSBastian Blank } 78d8414d3cSBastian Blank 79d8414d3cSBastian Blank /* 80d8414d3cSBastian Blank * Given an array of items in userspace, return a list of pages 81d8414d3cSBastian Blank * containing the data. If copying fails, either because of memory 82d8414d3cSBastian Blank * allocation failure or a problem reading user memory, return an 83d8414d3cSBastian Blank * error code; its up to the caller to dispose of any partial list. 84d8414d3cSBastian Blank */ 85d8414d3cSBastian Blank static int gather_array(struct list_head *pagelist, 86d8414d3cSBastian Blank unsigned nelem, size_t size, 87ceb90fa0SAndres Lagar-Cavilla const void __user *data) 88d8414d3cSBastian Blank { 89d8414d3cSBastian Blank unsigned pageidx; 90d8414d3cSBastian Blank void *pagedata; 91d8414d3cSBastian Blank int ret; 92d8414d3cSBastian Blank 93d8414d3cSBastian Blank if (size > PAGE_SIZE) 94d8414d3cSBastian Blank return 0; 95d8414d3cSBastian Blank 96d8414d3cSBastian Blank pageidx = PAGE_SIZE; 97d8414d3cSBastian Blank pagedata = NULL; /* quiet, gcc */ 98d8414d3cSBastian Blank while (nelem--) { 99d8414d3cSBastian Blank if (pageidx > PAGE_SIZE-size) { 100d8414d3cSBastian Blank struct page *page = alloc_page(GFP_KERNEL); 101d8414d3cSBastian Blank 102d8414d3cSBastian Blank ret = -ENOMEM; 103d8414d3cSBastian Blank if (page == NULL) 104d8414d3cSBastian Blank goto fail; 105d8414d3cSBastian Blank 106d8414d3cSBastian Blank pagedata = page_address(page); 107d8414d3cSBastian Blank 108d8414d3cSBastian Blank list_add_tail(&page->lru, pagelist); 109d8414d3cSBastian Blank pageidx = 0; 110d8414d3cSBastian Blank } 111d8414d3cSBastian Blank 112d8414d3cSBastian Blank ret = -EFAULT; 113d8414d3cSBastian Blank if (copy_from_user(pagedata + pageidx, data, size)) 114d8414d3cSBastian Blank goto fail; 115d8414d3cSBastian Blank 116d8414d3cSBastian Blank data += size; 117d8414d3cSBastian Blank pageidx += size; 118d8414d3cSBastian Blank } 119d8414d3cSBastian Blank 120d8414d3cSBastian Blank ret = 0; 121d8414d3cSBastian Blank 122d8414d3cSBastian Blank fail: 123d8414d3cSBastian Blank return ret; 124d8414d3cSBastian Blank } 125d8414d3cSBastian Blank 126d8414d3cSBastian Blank /* 127d8414d3cSBastian Blank * Call function "fn" on each element of the array fragmented 128d8414d3cSBastian Blank * over a list of pages. 129d8414d3cSBastian Blank */ 130d8414d3cSBastian Blank static int traverse_pages(unsigned nelem, size_t size, 131d8414d3cSBastian Blank struct list_head *pos, 132d8414d3cSBastian Blank int (*fn)(void *data, void *state), 133d8414d3cSBastian Blank void *state) 134d8414d3cSBastian Blank { 135d8414d3cSBastian Blank void *pagedata; 136d8414d3cSBastian Blank unsigned pageidx; 137d8414d3cSBastian Blank int ret = 0; 138d8414d3cSBastian Blank 139d8414d3cSBastian Blank BUG_ON(size > PAGE_SIZE); 140d8414d3cSBastian Blank 141d8414d3cSBastian Blank pageidx = PAGE_SIZE; 142d8414d3cSBastian Blank pagedata = NULL; /* hush, gcc */ 143d8414d3cSBastian Blank 144d8414d3cSBastian Blank while (nelem--) { 145d8414d3cSBastian Blank if (pageidx > PAGE_SIZE-size) { 146d8414d3cSBastian Blank struct page *page; 147d8414d3cSBastian Blank pos = pos->next; 148d8414d3cSBastian Blank page = list_entry(pos, struct page, lru); 149d8414d3cSBastian Blank pagedata = page_address(page); 150d8414d3cSBastian Blank pageidx = 0; 151d8414d3cSBastian Blank } 152d8414d3cSBastian Blank 153d8414d3cSBastian Blank ret = (*fn)(pagedata + pageidx, state); 154d8414d3cSBastian Blank if (ret) 155d8414d3cSBastian Blank break; 156d8414d3cSBastian Blank pageidx += size; 157d8414d3cSBastian Blank } 158d8414d3cSBastian Blank 159d8414d3cSBastian Blank return ret; 160d8414d3cSBastian Blank } 161d8414d3cSBastian Blank 1624e8c0c8cSDavid Vrabel /* 1634e8c0c8cSDavid Vrabel * Similar to traverse_pages, but use each page as a "block" of 1644e8c0c8cSDavid Vrabel * data to be processed as one unit. 1654e8c0c8cSDavid Vrabel */ 1664e8c0c8cSDavid Vrabel static int traverse_pages_block(unsigned nelem, size_t size, 1674e8c0c8cSDavid Vrabel struct list_head *pos, 1684e8c0c8cSDavid Vrabel int (*fn)(void *data, int nr, void *state), 1694e8c0c8cSDavid Vrabel void *state) 1704e8c0c8cSDavid Vrabel { 1714e8c0c8cSDavid Vrabel void *pagedata; 1724e8c0c8cSDavid Vrabel unsigned pageidx; 1734e8c0c8cSDavid Vrabel int ret = 0; 1744e8c0c8cSDavid Vrabel 1754e8c0c8cSDavid Vrabel BUG_ON(size > PAGE_SIZE); 1764e8c0c8cSDavid Vrabel 1774e8c0c8cSDavid Vrabel pageidx = PAGE_SIZE; 1784e8c0c8cSDavid Vrabel 1794e8c0c8cSDavid Vrabel while (nelem) { 1804e8c0c8cSDavid Vrabel int nr = (PAGE_SIZE/size); 1814e8c0c8cSDavid Vrabel struct page *page; 1824e8c0c8cSDavid Vrabel if (nr > nelem) 1834e8c0c8cSDavid Vrabel nr = nelem; 1844e8c0c8cSDavid Vrabel pos = pos->next; 1854e8c0c8cSDavid Vrabel page = list_entry(pos, struct page, lru); 1864e8c0c8cSDavid Vrabel pagedata = page_address(page); 1874e8c0c8cSDavid Vrabel ret = (*fn)(pagedata, nr, state); 1884e8c0c8cSDavid Vrabel if (ret) 1894e8c0c8cSDavid Vrabel break; 1904e8c0c8cSDavid Vrabel nelem -= nr; 1914e8c0c8cSDavid Vrabel } 1924e8c0c8cSDavid Vrabel 1934e8c0c8cSDavid Vrabel return ret; 1944e8c0c8cSDavid Vrabel } 1954e8c0c8cSDavid Vrabel 196a13d7201SJulien Grall struct mmap_gfn_state { 197d8414d3cSBastian Blank unsigned long va; 198d8414d3cSBastian Blank struct vm_area_struct *vma; 199d8414d3cSBastian Blank domid_t domain; 200d8414d3cSBastian Blank }; 201d8414d3cSBastian Blank 202a13d7201SJulien Grall static int mmap_gfn_range(void *data, void *state) 203d8414d3cSBastian Blank { 204d8414d3cSBastian Blank struct privcmd_mmap_entry *msg = data; 205a13d7201SJulien Grall struct mmap_gfn_state *st = state; 206d8414d3cSBastian Blank struct vm_area_struct *vma = st->vma; 207d8414d3cSBastian Blank int rc; 208d8414d3cSBastian Blank 209d8414d3cSBastian Blank /* Do not allow range to wrap the address space. */ 210d8414d3cSBastian Blank if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 211d8414d3cSBastian Blank ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 212d8414d3cSBastian Blank return -EINVAL; 213d8414d3cSBastian Blank 214d8414d3cSBastian Blank /* Range chunks must be contiguous in va space. */ 215d8414d3cSBastian Blank if ((msg->va != st->va) || 216d8414d3cSBastian Blank ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 217d8414d3cSBastian Blank return -EINVAL; 218d8414d3cSBastian Blank 219a13d7201SJulien Grall rc = xen_remap_domain_gfn_range(vma, 220d8414d3cSBastian Blank msg->va & PAGE_MASK, 221d8414d3cSBastian Blank msg->mfn, msg->npages, 222d8414d3cSBastian Blank vma->vm_page_prot, 2239a032e39SIan Campbell st->domain, NULL); 224d8414d3cSBastian Blank if (rc < 0) 225d8414d3cSBastian Blank return rc; 226d8414d3cSBastian Blank 227d8414d3cSBastian Blank st->va += msg->npages << PAGE_SHIFT; 228d8414d3cSBastian Blank 229d8414d3cSBastian Blank return 0; 230d8414d3cSBastian Blank } 231d8414d3cSBastian Blank 232d8414d3cSBastian Blank static long privcmd_ioctl_mmap(void __user *udata) 233d8414d3cSBastian Blank { 234d8414d3cSBastian Blank struct privcmd_mmap mmapcmd; 235d8414d3cSBastian Blank struct mm_struct *mm = current->mm; 236d8414d3cSBastian Blank struct vm_area_struct *vma; 237d8414d3cSBastian Blank int rc; 238d8414d3cSBastian Blank LIST_HEAD(pagelist); 239a13d7201SJulien Grall struct mmap_gfn_state state; 240d8414d3cSBastian Blank 241d71f5139SMukesh Rathor /* We only support privcmd_ioctl_mmap_batch for auto translated. */ 242d71f5139SMukesh Rathor if (xen_feature(XENFEAT_auto_translated_physmap)) 243d71f5139SMukesh Rathor return -ENOSYS; 244d71f5139SMukesh Rathor 245d8414d3cSBastian Blank if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 246d8414d3cSBastian Blank return -EFAULT; 247d8414d3cSBastian Blank 248d8414d3cSBastian Blank rc = gather_array(&pagelist, 249d8414d3cSBastian Blank mmapcmd.num, sizeof(struct privcmd_mmap_entry), 250d8414d3cSBastian Blank mmapcmd.entry); 251d8414d3cSBastian Blank 252d8414d3cSBastian Blank if (rc || list_empty(&pagelist)) 253d8414d3cSBastian Blank goto out; 254d8414d3cSBastian Blank 255d8414d3cSBastian Blank down_write(&mm->mmap_sem); 256d8414d3cSBastian Blank 257d8414d3cSBastian Blank { 258d8414d3cSBastian Blank struct page *page = list_first_entry(&pagelist, 259d8414d3cSBastian Blank struct page, lru); 260d8414d3cSBastian Blank struct privcmd_mmap_entry *msg = page_address(page); 261d8414d3cSBastian Blank 262d8414d3cSBastian Blank vma = find_vma(mm, msg->va); 263d8414d3cSBastian Blank rc = -EINVAL; 264d8414d3cSBastian Blank 265a5deabe0SAndres Lagar-Cavilla if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) 266d8414d3cSBastian Blank goto out_up; 267a5deabe0SAndres Lagar-Cavilla vma->vm_private_data = PRIV_VMA_LOCKED; 268d8414d3cSBastian Blank } 269d8414d3cSBastian Blank 270d8414d3cSBastian Blank state.va = vma->vm_start; 271d8414d3cSBastian Blank state.vma = vma; 272d8414d3cSBastian Blank state.domain = mmapcmd.dom; 273d8414d3cSBastian Blank 274d8414d3cSBastian Blank rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 275d8414d3cSBastian Blank &pagelist, 276a13d7201SJulien Grall mmap_gfn_range, &state); 277d8414d3cSBastian Blank 278d8414d3cSBastian Blank 279d8414d3cSBastian Blank out_up: 280d8414d3cSBastian Blank up_write(&mm->mmap_sem); 281d8414d3cSBastian Blank 282d8414d3cSBastian Blank out: 283d8414d3cSBastian Blank free_page_list(&pagelist); 284d8414d3cSBastian Blank 285d8414d3cSBastian Blank return rc; 286d8414d3cSBastian Blank } 287d8414d3cSBastian Blank 288d8414d3cSBastian Blank struct mmap_batch_state { 289d8414d3cSBastian Blank domid_t domain; 290d8414d3cSBastian Blank unsigned long va; 291d8414d3cSBastian Blank struct vm_area_struct *vma; 292d71f5139SMukesh Rathor int index; 293ceb90fa0SAndres Lagar-Cavilla /* A tristate: 294ceb90fa0SAndres Lagar-Cavilla * 0 for no errors 295ceb90fa0SAndres Lagar-Cavilla * 1 if at least one error has happened (and no 296ceb90fa0SAndres Lagar-Cavilla * -ENOENT errors have happened) 297ceb90fa0SAndres Lagar-Cavilla * -ENOENT if at least 1 -ENOENT has happened. 298ceb90fa0SAndres Lagar-Cavilla */ 299ceb90fa0SAndres Lagar-Cavilla int global_error; 30099beae6cSAndres Lagar-Cavilla int version; 301d8414d3cSBastian Blank 302a13d7201SJulien Grall /* User-space gfn array to store errors in the second pass for V1. */ 303a13d7201SJulien Grall xen_pfn_t __user *user_gfn; 30499beae6cSAndres Lagar-Cavilla /* User-space int array to store errors in the second pass for V2. */ 30599beae6cSAndres Lagar-Cavilla int __user *user_err; 306d8414d3cSBastian Blank }; 307d8414d3cSBastian Blank 308a13d7201SJulien Grall /* auto translated dom0 note: if domU being created is PV, then gfn is 309a13d7201SJulien Grall * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). 310d71f5139SMukesh Rathor */ 3114e8c0c8cSDavid Vrabel static int mmap_batch_fn(void *data, int nr, void *state) 312d8414d3cSBastian Blank { 313a13d7201SJulien Grall xen_pfn_t *gfnp = data; 314d8414d3cSBastian Blank struct mmap_batch_state *st = state; 315d71f5139SMukesh Rathor struct vm_area_struct *vma = st->vma; 316d71f5139SMukesh Rathor struct page **pages = vma->vm_private_data; 3174e8c0c8cSDavid Vrabel struct page **cur_pages = NULL; 318ceb90fa0SAndres Lagar-Cavilla int ret; 319d8414d3cSBastian Blank 320d71f5139SMukesh Rathor if (xen_feature(XENFEAT_auto_translated_physmap)) 3214e8c0c8cSDavid Vrabel cur_pages = &pages[st->index]; 322d71f5139SMukesh Rathor 3234e8c0c8cSDavid Vrabel BUG_ON(nr < 0); 324a13d7201SJulien Grall ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, 325a13d7201SJulien Grall (int *)gfnp, st->vma->vm_page_prot, 3264e8c0c8cSDavid Vrabel st->domain, cur_pages); 327ceb90fa0SAndres Lagar-Cavilla 3284e8c0c8cSDavid Vrabel /* Adjust the global_error? */ 3294e8c0c8cSDavid Vrabel if (ret != nr) { 330ceb90fa0SAndres Lagar-Cavilla if (ret == -ENOENT) 331ceb90fa0SAndres Lagar-Cavilla st->global_error = -ENOENT; 332ceb90fa0SAndres Lagar-Cavilla else { 333ceb90fa0SAndres Lagar-Cavilla /* Record that at least one error has happened. */ 334ceb90fa0SAndres Lagar-Cavilla if (st->global_error == 0) 335ceb90fa0SAndres Lagar-Cavilla st->global_error = 1; 336ceb90fa0SAndres Lagar-Cavilla } 337d8414d3cSBastian Blank } 3384e8c0c8cSDavid Vrabel st->va += PAGE_SIZE * nr; 3394e8c0c8cSDavid Vrabel st->index += nr; 340d8414d3cSBastian Blank 341d8414d3cSBastian Blank return 0; 342d8414d3cSBastian Blank } 343d8414d3cSBastian Blank 3444e8c0c8cSDavid Vrabel static int mmap_return_error(int err, struct mmap_batch_state *st) 345d8414d3cSBastian Blank { 3464e8c0c8cSDavid Vrabel int ret; 347d8414d3cSBastian Blank 34899beae6cSAndres Lagar-Cavilla if (st->version == 1) { 3494e8c0c8cSDavid Vrabel if (err) { 350a13d7201SJulien Grall xen_pfn_t gfn; 3514e8c0c8cSDavid Vrabel 352a13d7201SJulien Grall ret = get_user(gfn, st->user_gfn); 3534e8c0c8cSDavid Vrabel if (ret < 0) 3544e8c0c8cSDavid Vrabel return ret; 3554e8c0c8cSDavid Vrabel /* 3564e8c0c8cSDavid Vrabel * V1 encodes the error codes in the 32bit top 357a13d7201SJulien Grall * nibble of the gfn (with its known 3584e8c0c8cSDavid Vrabel * limitations vis-a-vis 64 bit callers). 3594e8c0c8cSDavid Vrabel */ 360a13d7201SJulien Grall gfn |= (err == -ENOENT) ? 3614e8c0c8cSDavid Vrabel PRIVCMD_MMAPBATCH_PAGED_ERROR : 3624e8c0c8cSDavid Vrabel PRIVCMD_MMAPBATCH_MFN_ERROR; 363a13d7201SJulien Grall return __put_user(gfn, st->user_gfn++); 3644e8c0c8cSDavid Vrabel } else 365a13d7201SJulien Grall st->user_gfn++; 36699beae6cSAndres Lagar-Cavilla } else { /* st->version == 2 */ 36799beae6cSAndres Lagar-Cavilla if (err) 36899beae6cSAndres Lagar-Cavilla return __put_user(err, st->user_err++); 36999beae6cSAndres Lagar-Cavilla else 37099beae6cSAndres Lagar-Cavilla st->user_err++; 37199beae6cSAndres Lagar-Cavilla } 37299beae6cSAndres Lagar-Cavilla 37399beae6cSAndres Lagar-Cavilla return 0; 374d8414d3cSBastian Blank } 375d8414d3cSBastian Blank 3764e8c0c8cSDavid Vrabel static int mmap_return_errors(void *data, int nr, void *state) 3774e8c0c8cSDavid Vrabel { 3784e8c0c8cSDavid Vrabel struct mmap_batch_state *st = state; 3794e8c0c8cSDavid Vrabel int *errs = data; 3804e8c0c8cSDavid Vrabel int i; 3814e8c0c8cSDavid Vrabel int ret; 3824e8c0c8cSDavid Vrabel 3834e8c0c8cSDavid Vrabel for (i = 0; i < nr; i++) { 3844e8c0c8cSDavid Vrabel ret = mmap_return_error(errs[i], st); 3854e8c0c8cSDavid Vrabel if (ret < 0) 3864e8c0c8cSDavid Vrabel return ret; 3874e8c0c8cSDavid Vrabel } 3884e8c0c8cSDavid Vrabel return 0; 3894e8c0c8cSDavid Vrabel } 3904e8c0c8cSDavid Vrabel 391a13d7201SJulien Grall /* Allocate pfns that are then mapped with gfns from foreign domid. Update 392d71f5139SMukesh Rathor * the vma with the page info to use later. 393d71f5139SMukesh Rathor * Returns: 0 if success, otherwise -errno 394d71f5139SMukesh Rathor */ 395d71f5139SMukesh Rathor static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) 396d71f5139SMukesh Rathor { 397d71f5139SMukesh Rathor int rc; 398d71f5139SMukesh Rathor struct page **pages; 399d71f5139SMukesh Rathor 400d71f5139SMukesh Rathor pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); 401d71f5139SMukesh Rathor if (pages == NULL) 402d71f5139SMukesh Rathor return -ENOMEM; 403d71f5139SMukesh Rathor 40481b286e0SDavid Vrabel rc = alloc_xenballooned_pages(numpgs, pages); 405d71f5139SMukesh Rathor if (rc != 0) { 406d71f5139SMukesh Rathor pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 407d71f5139SMukesh Rathor numpgs, rc); 408d71f5139SMukesh Rathor kfree(pages); 409d71f5139SMukesh Rathor return -ENOMEM; 410d71f5139SMukesh Rathor } 411a5deabe0SAndres Lagar-Cavilla BUG_ON(vma->vm_private_data != NULL); 412d71f5139SMukesh Rathor vma->vm_private_data = pages; 413d71f5139SMukesh Rathor 414d71f5139SMukesh Rathor return 0; 415d71f5139SMukesh Rathor } 416d71f5139SMukesh Rathor 4177cbea8dcSKirill A. Shutemov static const struct vm_operations_struct privcmd_vm_ops; 418d8414d3cSBastian Blank 419ceb90fa0SAndres Lagar-Cavilla static long privcmd_ioctl_mmap_batch(void __user *udata, int version) 420d8414d3cSBastian Blank { 421d8414d3cSBastian Blank int ret; 422ceb90fa0SAndres Lagar-Cavilla struct privcmd_mmapbatch_v2 m; 423d8414d3cSBastian Blank struct mm_struct *mm = current->mm; 424d8414d3cSBastian Blank struct vm_area_struct *vma; 425d8414d3cSBastian Blank unsigned long nr_pages; 426d8414d3cSBastian Blank LIST_HEAD(pagelist); 427d8414d3cSBastian Blank struct mmap_batch_state state; 428d8414d3cSBastian Blank 429ceb90fa0SAndres Lagar-Cavilla switch (version) { 430ceb90fa0SAndres Lagar-Cavilla case 1: 431ceb90fa0SAndres Lagar-Cavilla if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 432d8414d3cSBastian Blank return -EFAULT; 433ceb90fa0SAndres Lagar-Cavilla /* Returns per-frame error in m.arr. */ 434ceb90fa0SAndres Lagar-Cavilla m.err = NULL; 435ceb90fa0SAndres Lagar-Cavilla if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 436ceb90fa0SAndres Lagar-Cavilla return -EFAULT; 437ceb90fa0SAndres Lagar-Cavilla break; 438ceb90fa0SAndres Lagar-Cavilla case 2: 439ceb90fa0SAndres Lagar-Cavilla if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 440ceb90fa0SAndres Lagar-Cavilla return -EFAULT; 441ceb90fa0SAndres Lagar-Cavilla /* Returns per-frame error code in m.err. */ 442ceb90fa0SAndres Lagar-Cavilla if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 443ceb90fa0SAndres Lagar-Cavilla return -EFAULT; 444ceb90fa0SAndres Lagar-Cavilla break; 445ceb90fa0SAndres Lagar-Cavilla default: 446ceb90fa0SAndres Lagar-Cavilla return -EINVAL; 447ceb90fa0SAndres Lagar-Cavilla } 448d8414d3cSBastian Blank 449*5995a68aSJulien Grall nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); 450d8414d3cSBastian Blank if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 451d8414d3cSBastian Blank return -EINVAL; 452d8414d3cSBastian Blank 453ceb90fa0SAndres Lagar-Cavilla ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 454d8414d3cSBastian Blank 455ceb90fa0SAndres Lagar-Cavilla if (ret) 456d8414d3cSBastian Blank goto out; 457ceb90fa0SAndres Lagar-Cavilla if (list_empty(&pagelist)) { 458ceb90fa0SAndres Lagar-Cavilla ret = -EINVAL; 459ceb90fa0SAndres Lagar-Cavilla goto out; 460ceb90fa0SAndres Lagar-Cavilla } 461ceb90fa0SAndres Lagar-Cavilla 46299beae6cSAndres Lagar-Cavilla if (version == 2) { 46399beae6cSAndres Lagar-Cavilla /* Zero error array now to only copy back actual errors. */ 46499beae6cSAndres Lagar-Cavilla if (clear_user(m.err, sizeof(int) * m.num)) { 46599beae6cSAndres Lagar-Cavilla ret = -EFAULT; 466ceb90fa0SAndres Lagar-Cavilla goto out; 467ceb90fa0SAndres Lagar-Cavilla } 46899beae6cSAndres Lagar-Cavilla } 469d8414d3cSBastian Blank 470d8414d3cSBastian Blank down_write(&mm->mmap_sem); 471d8414d3cSBastian Blank 472d8414d3cSBastian Blank vma = find_vma(mm, m.addr); 473d8414d3cSBastian Blank if (!vma || 474a5deabe0SAndres Lagar-Cavilla vma->vm_ops != &privcmd_vm_ops) { 47568fa965dSMats Petersson ret = -EINVAL; 476a5deabe0SAndres Lagar-Cavilla goto out_unlock; 477a5deabe0SAndres Lagar-Cavilla } 478a5deabe0SAndres Lagar-Cavilla 479a5deabe0SAndres Lagar-Cavilla /* 480a5deabe0SAndres Lagar-Cavilla * Caller must either: 481a5deabe0SAndres Lagar-Cavilla * 482a5deabe0SAndres Lagar-Cavilla * Map the whole VMA range, which will also allocate all the 483a5deabe0SAndres Lagar-Cavilla * pages required for the auto_translated_physmap case. 484a5deabe0SAndres Lagar-Cavilla * 485a5deabe0SAndres Lagar-Cavilla * Or 486a5deabe0SAndres Lagar-Cavilla * 487a5deabe0SAndres Lagar-Cavilla * Map unmapped holes left from a previous map attempt (e.g., 488a5deabe0SAndres Lagar-Cavilla * because those foreign frames were previously paged out). 489a5deabe0SAndres Lagar-Cavilla */ 490a5deabe0SAndres Lagar-Cavilla if (vma->vm_private_data == NULL) { 491a5deabe0SAndres Lagar-Cavilla if (m.addr != vma->vm_start || 492a5deabe0SAndres Lagar-Cavilla m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { 493a5deabe0SAndres Lagar-Cavilla ret = -EINVAL; 494a5deabe0SAndres Lagar-Cavilla goto out_unlock; 495d8414d3cSBastian Blank } 496d71f5139SMukesh Rathor if (xen_feature(XENFEAT_auto_translated_physmap)) { 497*5995a68aSJulien Grall ret = alloc_empty_pages(vma, nr_pages); 498a5deabe0SAndres Lagar-Cavilla if (ret < 0) 499a5deabe0SAndres Lagar-Cavilla goto out_unlock; 500a5deabe0SAndres Lagar-Cavilla } else 501a5deabe0SAndres Lagar-Cavilla vma->vm_private_data = PRIV_VMA_LOCKED; 502a5deabe0SAndres Lagar-Cavilla } else { 503a5deabe0SAndres Lagar-Cavilla if (m.addr < vma->vm_start || 504a5deabe0SAndres Lagar-Cavilla m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { 505a5deabe0SAndres Lagar-Cavilla ret = -EINVAL; 506a5deabe0SAndres Lagar-Cavilla goto out_unlock; 507a5deabe0SAndres Lagar-Cavilla } 508a5deabe0SAndres Lagar-Cavilla if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { 509a5deabe0SAndres Lagar-Cavilla ret = -EINVAL; 510a5deabe0SAndres Lagar-Cavilla goto out_unlock; 511d71f5139SMukesh Rathor } 512d71f5139SMukesh Rathor } 513d8414d3cSBastian Blank 514d8414d3cSBastian Blank state.domain = m.dom; 515d8414d3cSBastian Blank state.vma = vma; 516d8414d3cSBastian Blank state.va = m.addr; 517d71f5139SMukesh Rathor state.index = 0; 518ceb90fa0SAndres Lagar-Cavilla state.global_error = 0; 51999beae6cSAndres Lagar-Cavilla state.version = version; 520d8414d3cSBastian Blank 521*5995a68aSJulien Grall BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); 522ceb90fa0SAndres Lagar-Cavilla /* mmap_batch_fn guarantees ret == 0 */ 5234e8c0c8cSDavid Vrabel BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), 524ceb90fa0SAndres Lagar-Cavilla &pagelist, mmap_batch_fn, &state)); 525d8414d3cSBastian Blank 526d8414d3cSBastian Blank up_write(&mm->mmap_sem); 527d8414d3cSBastian Blank 52868fa965dSMats Petersson if (state.global_error) { 529ceb90fa0SAndres Lagar-Cavilla /* Write back errors in second pass. */ 530a13d7201SJulien Grall state.user_gfn = (xen_pfn_t *)m.arr; 53199beae6cSAndres Lagar-Cavilla state.user_err = m.err; 5324e8c0c8cSDavid Vrabel ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), 53399beae6cSAndres Lagar-Cavilla &pagelist, mmap_return_errors, &state); 53468fa965dSMats Petersson } else 53568fa965dSMats Petersson ret = 0; 53668fa965dSMats Petersson 537ceb90fa0SAndres Lagar-Cavilla /* If we have not had any EFAULT-like global errors then set the global 538ceb90fa0SAndres Lagar-Cavilla * error to -ENOENT if necessary. */ 539ceb90fa0SAndres Lagar-Cavilla if ((ret == 0) && (state.global_error == -ENOENT)) 540ceb90fa0SAndres Lagar-Cavilla ret = -ENOENT; 541d8414d3cSBastian Blank 542d8414d3cSBastian Blank out: 543d8414d3cSBastian Blank free_page_list(&pagelist); 544d8414d3cSBastian Blank return ret; 545a5deabe0SAndres Lagar-Cavilla 546a5deabe0SAndres Lagar-Cavilla out_unlock: 547a5deabe0SAndres Lagar-Cavilla up_write(&mm->mmap_sem); 548a5deabe0SAndres Lagar-Cavilla goto out; 549d8414d3cSBastian Blank } 550d8414d3cSBastian Blank 551d8414d3cSBastian Blank static long privcmd_ioctl(struct file *file, 552d8414d3cSBastian Blank unsigned int cmd, unsigned long data) 553d8414d3cSBastian Blank { 554d8414d3cSBastian Blank int ret = -ENOSYS; 555d8414d3cSBastian Blank void __user *udata = (void __user *) data; 556d8414d3cSBastian Blank 557d8414d3cSBastian Blank switch (cmd) { 558d8414d3cSBastian Blank case IOCTL_PRIVCMD_HYPERCALL: 559d8414d3cSBastian Blank ret = privcmd_ioctl_hypercall(udata); 560d8414d3cSBastian Blank break; 561d8414d3cSBastian Blank 562d8414d3cSBastian Blank case IOCTL_PRIVCMD_MMAP: 563d8414d3cSBastian Blank ret = privcmd_ioctl_mmap(udata); 564d8414d3cSBastian Blank break; 565d8414d3cSBastian Blank 566d8414d3cSBastian Blank case IOCTL_PRIVCMD_MMAPBATCH: 567ceb90fa0SAndres Lagar-Cavilla ret = privcmd_ioctl_mmap_batch(udata, 1); 568ceb90fa0SAndres Lagar-Cavilla break; 569ceb90fa0SAndres Lagar-Cavilla 570ceb90fa0SAndres Lagar-Cavilla case IOCTL_PRIVCMD_MMAPBATCH_V2: 571ceb90fa0SAndres Lagar-Cavilla ret = privcmd_ioctl_mmap_batch(udata, 2); 572d8414d3cSBastian Blank break; 573d8414d3cSBastian Blank 574d8414d3cSBastian Blank default: 575d8414d3cSBastian Blank ret = -EINVAL; 576d8414d3cSBastian Blank break; 577d8414d3cSBastian Blank } 578d8414d3cSBastian Blank 579d8414d3cSBastian Blank return ret; 580d8414d3cSBastian Blank } 581d8414d3cSBastian Blank 582d71f5139SMukesh Rathor static void privcmd_close(struct vm_area_struct *vma) 583d71f5139SMukesh Rathor { 584d71f5139SMukesh Rathor struct page **pages = vma->vm_private_data; 585d71f5139SMukesh Rathor int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 586*5995a68aSJulien Grall int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; 587b6497b38SIan Campbell int rc; 588d71f5139SMukesh Rathor 5899eff37a8SDan Carpenter if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) 590d71f5139SMukesh Rathor return; 591d71f5139SMukesh Rathor 592*5995a68aSJulien Grall rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); 593b6497b38SIan Campbell if (rc == 0) 594d71f5139SMukesh Rathor free_xenballooned_pages(numpgs, pages); 595b6497b38SIan Campbell else 596b6497b38SIan Campbell pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", 597b6497b38SIan Campbell numpgs, rc); 598d71f5139SMukesh Rathor kfree(pages); 599d71f5139SMukesh Rathor } 600d71f5139SMukesh Rathor 601d8414d3cSBastian Blank static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 602d8414d3cSBastian Blank { 603d8414d3cSBastian Blank printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 604d8414d3cSBastian Blank vma, vma->vm_start, vma->vm_end, 605d8414d3cSBastian Blank vmf->pgoff, vmf->virtual_address); 606d8414d3cSBastian Blank 607d8414d3cSBastian Blank return VM_FAULT_SIGBUS; 608d8414d3cSBastian Blank } 609d8414d3cSBastian Blank 6107cbea8dcSKirill A. Shutemov static const struct vm_operations_struct privcmd_vm_ops = { 611d71f5139SMukesh Rathor .close = privcmd_close, 612d8414d3cSBastian Blank .fault = privcmd_fault 613d8414d3cSBastian Blank }; 614d8414d3cSBastian Blank 615d8414d3cSBastian Blank static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 616d8414d3cSBastian Blank { 617d8414d3cSBastian Blank /* DONTCOPY is essential for Xen because copy_page_range doesn't know 618d8414d3cSBastian Blank * how to recreate these mappings */ 619314e51b9SKonstantin Khlebnikov vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | 620314e51b9SKonstantin Khlebnikov VM_DONTEXPAND | VM_DONTDUMP; 621d8414d3cSBastian Blank vma->vm_ops = &privcmd_vm_ops; 622d8414d3cSBastian Blank vma->vm_private_data = NULL; 623d8414d3cSBastian Blank 624d8414d3cSBastian Blank return 0; 625d8414d3cSBastian Blank } 626d8414d3cSBastian Blank 627a5deabe0SAndres Lagar-Cavilla /* 628a5deabe0SAndres Lagar-Cavilla * For MMAPBATCH*. This allows asserting the singleshot mapping 629a5deabe0SAndres Lagar-Cavilla * on a per pfn/pte basis. Mapping calls that fail with ENOENT 630a5deabe0SAndres Lagar-Cavilla * can be then retried until success. 631a5deabe0SAndres Lagar-Cavilla */ 632a5deabe0SAndres Lagar-Cavilla static int is_mapped_fn(pte_t *pte, struct page *pmd_page, 633a5deabe0SAndres Lagar-Cavilla unsigned long addr, void *data) 634d8414d3cSBastian Blank { 635a5deabe0SAndres Lagar-Cavilla return pte_none(*pte) ? 0 : -EBUSY; 636a5deabe0SAndres Lagar-Cavilla } 637a5deabe0SAndres Lagar-Cavilla 638a5deabe0SAndres Lagar-Cavilla static int privcmd_vma_range_is_mapped( 639a5deabe0SAndres Lagar-Cavilla struct vm_area_struct *vma, 640a5deabe0SAndres Lagar-Cavilla unsigned long addr, 641a5deabe0SAndres Lagar-Cavilla unsigned long nr_pages) 642a5deabe0SAndres Lagar-Cavilla { 643a5deabe0SAndres Lagar-Cavilla return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, 644a5deabe0SAndres Lagar-Cavilla is_mapped_fn, NULL) != 0; 645d8414d3cSBastian Blank } 646d8414d3cSBastian Blank 647d8414d3cSBastian Blank const struct file_operations xen_privcmd_fops = { 648d8414d3cSBastian Blank .owner = THIS_MODULE, 649d8414d3cSBastian Blank .unlocked_ioctl = privcmd_ioctl, 650d8414d3cSBastian Blank .mmap = privcmd_mmap, 651d8414d3cSBastian Blank }; 652d8414d3cSBastian Blank EXPORT_SYMBOL_GPL(xen_privcmd_fops); 653d8414d3cSBastian Blank 654d8414d3cSBastian Blank static struct miscdevice privcmd_dev = { 655d8414d3cSBastian Blank .minor = MISC_DYNAMIC_MINOR, 656d8414d3cSBastian Blank .name = "xen/privcmd", 657d8414d3cSBastian Blank .fops = &xen_privcmd_fops, 658d8414d3cSBastian Blank }; 659d8414d3cSBastian Blank 660d8414d3cSBastian Blank static int __init privcmd_init(void) 661d8414d3cSBastian Blank { 662d8414d3cSBastian Blank int err; 663d8414d3cSBastian Blank 664d8414d3cSBastian Blank if (!xen_domain()) 665d8414d3cSBastian Blank return -ENODEV; 666d8414d3cSBastian Blank 667d8414d3cSBastian Blank err = misc_register(&privcmd_dev); 668d8414d3cSBastian Blank if (err != 0) { 669283c0972SJoe Perches pr_err("Could not register Xen privcmd device\n"); 670d8414d3cSBastian Blank return err; 671d8414d3cSBastian Blank } 672d8414d3cSBastian Blank return 0; 673d8414d3cSBastian Blank } 674d8414d3cSBastian Blank 675d8414d3cSBastian Blank static void __exit privcmd_exit(void) 676d8414d3cSBastian Blank { 677d8414d3cSBastian Blank misc_deregister(&privcmd_dev); 678d8414d3cSBastian Blank } 679d8414d3cSBastian Blank 680d8414d3cSBastian Blank module_init(privcmd_init); 681d8414d3cSBastian Blank module_exit(privcmd_exit); 682