1de56a948SPaul Mackerras /* 2de56a948SPaul Mackerras * This program is free software; you can redistribute it and/or modify 3de56a948SPaul Mackerras * it under the terms of the GNU General Public License, version 2, as 4de56a948SPaul Mackerras * published by the Free Software Foundation. 5de56a948SPaul Mackerras * 6de56a948SPaul Mackerras * This program is distributed in the hope that it will be useful, 7de56a948SPaul Mackerras * but WITHOUT ANY WARRANTY; without even the implied warranty of 8de56a948SPaul Mackerras * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9de56a948SPaul Mackerras * GNU General Public License for more details. 10de56a948SPaul Mackerras * 11de56a948SPaul Mackerras * You should have received a copy of the GNU General Public License 12de56a948SPaul Mackerras * along with this program; if not, write to the Free Software 13de56a948SPaul Mackerras * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14de56a948SPaul Mackerras * 15de56a948SPaul Mackerras * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16de56a948SPaul Mackerras */ 17de56a948SPaul Mackerras 18de56a948SPaul Mackerras #include <linux/types.h> 19de56a948SPaul Mackerras #include <linux/string.h> 20de56a948SPaul Mackerras #include <linux/kvm.h> 21de56a948SPaul Mackerras #include <linux/kvm_host.h> 22de56a948SPaul Mackerras #include <linux/highmem.h> 23de56a948SPaul Mackerras #include <linux/gfp.h> 24de56a948SPaul Mackerras #include <linux/slab.h> 25de56a948SPaul Mackerras #include <linux/hugetlb.h> 268936dda4SPaul Mackerras #include <linux/vmalloc.h> 272c9097e4SPaul Mackerras #include <linux/srcu.h> 28a2932923SPaul Mackerras #include <linux/anon_inodes.h> 29a2932923SPaul Mackerras #include <linux/file.h> 30de56a948SPaul Mackerras 31de56a948SPaul Mackerras #include <asm/tlbflush.h> 32de56a948SPaul Mackerras #include <asm/kvm_ppc.h> 33de56a948SPaul Mackerras #include <asm/kvm_book3s.h> 34de56a948SPaul Mackerras #include <asm/mmu-hash64.h> 35de56a948SPaul Mackerras #include <asm/hvcall.h> 36de56a948SPaul Mackerras #include <asm/synch.h> 37de56a948SPaul Mackerras #include <asm/ppc-opcode.h> 38de56a948SPaul Mackerras #include <asm/cputable.h> 39de56a948SPaul Mackerras 40990978e9SAneesh Kumar K.V #include "book3s_hv_cma.h" 41990978e9SAneesh Kumar K.V 429e368f29SPaul Mackerras /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 439e368f29SPaul Mackerras #define MAX_LPID_970 63 44de56a948SPaul Mackerras 4532fad281SPaul Mackerras /* Power architecture requires HPT is at least 256kB */ 4632fad281SPaul Mackerras #define PPC_MIN_HPT_ORDER 18 4732fad281SPaul Mackerras 487ed661bfSPaul Mackerras static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 497ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 507ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret); 51a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm); 527ed661bfSPaul Mackerras 5332fad281SPaul Mackerras long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 54de56a948SPaul Mackerras { 55de56a948SPaul Mackerras unsigned long hpt; 568936dda4SPaul Mackerras struct revmap_entry *rev; 57fa61a4e3SAneesh Kumar K.V struct page *page = NULL; 58fa61a4e3SAneesh Kumar K.V long order = KVM_DEFAULT_HPT_ORDER; 59de56a948SPaul Mackerras 6032fad281SPaul Mackerras if (htab_orderp) { 6132fad281SPaul Mackerras order = *htab_orderp; 6232fad281SPaul Mackerras if (order < PPC_MIN_HPT_ORDER) 6332fad281SPaul Mackerras order = PPC_MIN_HPT_ORDER; 6432fad281SPaul Mackerras } 6532fad281SPaul Mackerras 66fa61a4e3SAneesh Kumar K.V kvm->arch.hpt_cma_alloc = 0; 6732fad281SPaul Mackerras /* 6832fad281SPaul Mackerras * try first to allocate it from the kernel page allocator. 69fa61a4e3SAneesh Kumar K.V * We keep the CMA reserved for failed allocation. 7032fad281SPaul Mackerras */ 7132fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | 7232fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 7332fad281SPaul Mackerras 7432fad281SPaul Mackerras /* Next try to allocate from the preallocated pool */ 7532fad281SPaul Mackerras if (!hpt) { 76990978e9SAneesh Kumar K.V VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); 77fa61a4e3SAneesh Kumar K.V page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); 78fa61a4e3SAneesh Kumar K.V if (page) { 79fa61a4e3SAneesh Kumar K.V hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 80fa61a4e3SAneesh Kumar K.V kvm->arch.hpt_cma_alloc = 1; 81fa61a4e3SAneesh Kumar K.V } else 82fa61a4e3SAneesh Kumar K.V --order; 83d2a1b483SAlexander Graf } 84d2a1b483SAlexander Graf 8532fad281SPaul Mackerras /* Lastly try successively smaller sizes from the page allocator */ 8632fad281SPaul Mackerras while (!hpt && order > PPC_MIN_HPT_ORDER) { 8732fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 8832fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 8932fad281SPaul Mackerras if (!hpt) 9032fad281SPaul Mackerras --order; 91de56a948SPaul Mackerras } 9232fad281SPaul Mackerras 9332fad281SPaul Mackerras if (!hpt) 9432fad281SPaul Mackerras return -ENOMEM; 9532fad281SPaul Mackerras 96de56a948SPaul Mackerras kvm->arch.hpt_virt = hpt; 9732fad281SPaul Mackerras kvm->arch.hpt_order = order; 9832fad281SPaul Mackerras /* HPTEs are 2**4 bytes long */ 9932fad281SPaul Mackerras kvm->arch.hpt_npte = 1ul << (order - 4); 10032fad281SPaul Mackerras /* 128 (2**7) bytes in each HPTEG */ 10132fad281SPaul Mackerras kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; 102de56a948SPaul Mackerras 1038936dda4SPaul Mackerras /* Allocate reverse map array */ 10432fad281SPaul Mackerras rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 1058936dda4SPaul Mackerras if (!rev) { 1068936dda4SPaul Mackerras pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); 1078936dda4SPaul Mackerras goto out_freehpt; 1088936dda4SPaul Mackerras } 1098936dda4SPaul Mackerras kvm->arch.revmap = rev; 11032fad281SPaul Mackerras kvm->arch.sdr1 = __pa(hpt) | (order - 18); 1118936dda4SPaul Mackerras 11232fad281SPaul Mackerras pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", 11332fad281SPaul Mackerras hpt, order, kvm->arch.lpid); 114de56a948SPaul Mackerras 11532fad281SPaul Mackerras if (htab_orderp) 11632fad281SPaul Mackerras *htab_orderp = order; 117de56a948SPaul Mackerras return 0; 1188936dda4SPaul Mackerras 1198936dda4SPaul Mackerras out_freehpt: 120fa61a4e3SAneesh Kumar K.V if (kvm->arch.hpt_cma_alloc) 121fa61a4e3SAneesh Kumar K.V kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); 12232fad281SPaul Mackerras else 12332fad281SPaul Mackerras free_pages(hpt, order - PAGE_SHIFT); 1248936dda4SPaul Mackerras return -ENOMEM; 125de56a948SPaul Mackerras } 126de56a948SPaul Mackerras 12732fad281SPaul Mackerras long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) 12832fad281SPaul Mackerras { 12932fad281SPaul Mackerras long err = -EBUSY; 13032fad281SPaul Mackerras long order; 13132fad281SPaul Mackerras 13232fad281SPaul Mackerras mutex_lock(&kvm->lock); 13332fad281SPaul Mackerras if (kvm->arch.rma_setup_done) { 13432fad281SPaul Mackerras kvm->arch.rma_setup_done = 0; 13532fad281SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 13632fad281SPaul Mackerras smp_mb(); 13732fad281SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 13832fad281SPaul Mackerras kvm->arch.rma_setup_done = 1; 13932fad281SPaul Mackerras goto out; 14032fad281SPaul Mackerras } 14132fad281SPaul Mackerras } 14232fad281SPaul Mackerras if (kvm->arch.hpt_virt) { 14332fad281SPaul Mackerras order = kvm->arch.hpt_order; 14432fad281SPaul Mackerras /* Set the entire HPT to 0, i.e. invalid HPTEs */ 14532fad281SPaul Mackerras memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 14632fad281SPaul Mackerras /* 147a64fd707SPaul Mackerras * Reset all the reverse-mapping chains for all memslots 148a64fd707SPaul Mackerras */ 149a64fd707SPaul Mackerras kvmppc_rmap_reset(kvm); 1501b400ba0SPaul Mackerras /* Ensure that each vcpu will flush its TLB on next entry. */ 1511b400ba0SPaul Mackerras cpumask_setall(&kvm->arch.need_tlb_flush); 15232fad281SPaul Mackerras *htab_orderp = order; 15332fad281SPaul Mackerras err = 0; 15432fad281SPaul Mackerras } else { 15532fad281SPaul Mackerras err = kvmppc_alloc_hpt(kvm, htab_orderp); 15632fad281SPaul Mackerras order = *htab_orderp; 15732fad281SPaul Mackerras } 15832fad281SPaul Mackerras out: 15932fad281SPaul Mackerras mutex_unlock(&kvm->lock); 16032fad281SPaul Mackerras return err; 16132fad281SPaul Mackerras } 16232fad281SPaul Mackerras 163de56a948SPaul Mackerras void kvmppc_free_hpt(struct kvm *kvm) 164de56a948SPaul Mackerras { 165043cc4d7SScott Wood kvmppc_free_lpid(kvm->arch.lpid); 1668936dda4SPaul Mackerras vfree(kvm->arch.revmap); 167fa61a4e3SAneesh Kumar K.V if (kvm->arch.hpt_cma_alloc) 168fa61a4e3SAneesh Kumar K.V kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), 169fa61a4e3SAneesh Kumar K.V 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); 170d2a1b483SAlexander Graf else 17132fad281SPaul Mackerras free_pages(kvm->arch.hpt_virt, 17232fad281SPaul Mackerras kvm->arch.hpt_order - PAGE_SHIFT); 173de56a948SPaul Mackerras } 174de56a948SPaul Mackerras 175da9d1d7fSPaul Mackerras /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ 176da9d1d7fSPaul Mackerras static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) 177de56a948SPaul Mackerras { 178da9d1d7fSPaul Mackerras return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; 179da9d1d7fSPaul Mackerras } 180da9d1d7fSPaul Mackerras 181da9d1d7fSPaul Mackerras /* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ 182da9d1d7fSPaul Mackerras static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) 183da9d1d7fSPaul Mackerras { 184da9d1d7fSPaul Mackerras return (pgsize == 0x10000) ? 0x1000 : 0; 185da9d1d7fSPaul Mackerras } 186da9d1d7fSPaul Mackerras 187da9d1d7fSPaul Mackerras void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, 188da9d1d7fSPaul Mackerras unsigned long porder) 189da9d1d7fSPaul Mackerras { 190de56a948SPaul Mackerras unsigned long i; 191b2b2f165SPaul Mackerras unsigned long npages; 192c77162deSPaul Mackerras unsigned long hp_v, hp_r; 193c77162deSPaul Mackerras unsigned long addr, hash; 194da9d1d7fSPaul Mackerras unsigned long psize; 195da9d1d7fSPaul Mackerras unsigned long hp0, hp1; 1967ed661bfSPaul Mackerras unsigned long idx_ret; 197c77162deSPaul Mackerras long ret; 19832fad281SPaul Mackerras struct kvm *kvm = vcpu->kvm; 199de56a948SPaul Mackerras 200da9d1d7fSPaul Mackerras psize = 1ul << porder; 201da9d1d7fSPaul Mackerras npages = memslot->npages >> (porder - PAGE_SHIFT); 202de56a948SPaul Mackerras 203de56a948SPaul Mackerras /* VRMA can't be > 1TB */ 2048936dda4SPaul Mackerras if (npages > 1ul << (40 - porder)) 2058936dda4SPaul Mackerras npages = 1ul << (40 - porder); 206de56a948SPaul Mackerras /* Can't use more than 1 HPTE per HPTEG */ 20732fad281SPaul Mackerras if (npages > kvm->arch.hpt_mask + 1) 20832fad281SPaul Mackerras npages = kvm->arch.hpt_mask + 1; 209de56a948SPaul Mackerras 210da9d1d7fSPaul Mackerras hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 211da9d1d7fSPaul Mackerras HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); 212da9d1d7fSPaul Mackerras hp1 = hpte1_pgsize_encoding(psize) | 213da9d1d7fSPaul Mackerras HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; 214da9d1d7fSPaul Mackerras 215de56a948SPaul Mackerras for (i = 0; i < npages; ++i) { 216c77162deSPaul Mackerras addr = i << porder; 217de56a948SPaul Mackerras /* can't use hpt_hash since va > 64 bits */ 21832fad281SPaul Mackerras hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; 219de56a948SPaul Mackerras /* 220de56a948SPaul Mackerras * We assume that the hash table is empty and no 221de56a948SPaul Mackerras * vcpus are using it at this stage. Since we create 222de56a948SPaul Mackerras * at most one HPTE per HPTEG, we just assume entry 7 223de56a948SPaul Mackerras * is available and use it. 224de56a948SPaul Mackerras */ 2258936dda4SPaul Mackerras hash = (hash << 3) + 7; 226da9d1d7fSPaul Mackerras hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 227da9d1d7fSPaul Mackerras hp_r = hp1 | addr; 2287ed661bfSPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r, 2297ed661bfSPaul Mackerras &idx_ret); 230c77162deSPaul Mackerras if (ret != H_SUCCESS) { 231c77162deSPaul Mackerras pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 232c77162deSPaul Mackerras addr, ret); 233c77162deSPaul Mackerras break; 234c77162deSPaul Mackerras } 235de56a948SPaul Mackerras } 236de56a948SPaul Mackerras } 237de56a948SPaul Mackerras 238de56a948SPaul Mackerras int kvmppc_mmu_hv_init(void) 239de56a948SPaul Mackerras { 2409e368f29SPaul Mackerras unsigned long host_lpid, rsvd_lpid; 2419e368f29SPaul Mackerras 2429e368f29SPaul Mackerras if (!cpu_has_feature(CPU_FTR_HVMODE)) 243de56a948SPaul Mackerras return -EINVAL; 2449e368f29SPaul Mackerras 245043cc4d7SScott Wood /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 2469e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) { 2479e368f29SPaul Mackerras host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 2489e368f29SPaul Mackerras rsvd_lpid = LPID_RSVD; 2499e368f29SPaul Mackerras } else { 2509e368f29SPaul Mackerras host_lpid = 0; /* PPC970 */ 2519e368f29SPaul Mackerras rsvd_lpid = MAX_LPID_970; 2529e368f29SPaul Mackerras } 2539e368f29SPaul Mackerras 254043cc4d7SScott Wood kvmppc_init_lpid(rsvd_lpid + 1); 255043cc4d7SScott Wood 256043cc4d7SScott Wood kvmppc_claim_lpid(host_lpid); 2579e368f29SPaul Mackerras /* rsvd_lpid is reserved for use in partition switching */ 258043cc4d7SScott Wood kvmppc_claim_lpid(rsvd_lpid); 259de56a948SPaul Mackerras 260de56a948SPaul Mackerras return 0; 261de56a948SPaul Mackerras } 262de56a948SPaul Mackerras 263de56a948SPaul Mackerras static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 264de56a948SPaul Mackerras { 265de56a948SPaul Mackerras kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 266de56a948SPaul Mackerras } 267de56a948SPaul Mackerras 268c77162deSPaul Mackerras /* 269c77162deSPaul Mackerras * This is called to get a reference to a guest page if there isn't 270a66b48c3SPaul Mackerras * one already in the memslot->arch.slot_phys[] array. 271c77162deSPaul Mackerras */ 272c77162deSPaul Mackerras static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, 273da9d1d7fSPaul Mackerras struct kvm_memory_slot *memslot, 274da9d1d7fSPaul Mackerras unsigned long psize) 275c77162deSPaul Mackerras { 276c77162deSPaul Mackerras unsigned long start; 277da9d1d7fSPaul Mackerras long np, err; 278da9d1d7fSPaul Mackerras struct page *page, *hpage, *pages[1]; 279da9d1d7fSPaul Mackerras unsigned long s, pgsize; 280c77162deSPaul Mackerras unsigned long *physp; 2819d0ef5eaSPaul Mackerras unsigned int is_io, got, pgorder; 2829d0ef5eaSPaul Mackerras struct vm_area_struct *vma; 283da9d1d7fSPaul Mackerras unsigned long pfn, i, npages; 284c77162deSPaul Mackerras 285a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 286c77162deSPaul Mackerras if (!physp) 287c77162deSPaul Mackerras return -EINVAL; 288da9d1d7fSPaul Mackerras if (physp[gfn - memslot->base_gfn]) 289c77162deSPaul Mackerras return 0; 290c77162deSPaul Mackerras 2919d0ef5eaSPaul Mackerras is_io = 0; 2929d0ef5eaSPaul Mackerras got = 0; 293c77162deSPaul Mackerras page = NULL; 294da9d1d7fSPaul Mackerras pgsize = psize; 2959d0ef5eaSPaul Mackerras err = -EINVAL; 296c77162deSPaul Mackerras start = gfn_to_hva_memslot(memslot, gfn); 297c77162deSPaul Mackerras 298c77162deSPaul Mackerras /* Instantiate and get the page we want access to */ 299c77162deSPaul Mackerras np = get_user_pages_fast(start, 1, 1, pages); 3009d0ef5eaSPaul Mackerras if (np != 1) { 3019d0ef5eaSPaul Mackerras /* Look up the vma for the page */ 3029d0ef5eaSPaul Mackerras down_read(¤t->mm->mmap_sem); 3039d0ef5eaSPaul Mackerras vma = find_vma(current->mm, start); 3049d0ef5eaSPaul Mackerras if (!vma || vma->vm_start > start || 3059d0ef5eaSPaul Mackerras start + psize > vma->vm_end || 3069d0ef5eaSPaul Mackerras !(vma->vm_flags & VM_PFNMAP)) 3079d0ef5eaSPaul Mackerras goto up_err; 3089d0ef5eaSPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 3099d0ef5eaSPaul Mackerras pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 3109d0ef5eaSPaul Mackerras /* check alignment of pfn vs. requested page size */ 3119d0ef5eaSPaul Mackerras if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) 3129d0ef5eaSPaul Mackerras goto up_err; 3139d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3149d0ef5eaSPaul Mackerras 3159d0ef5eaSPaul Mackerras } else { 316c77162deSPaul Mackerras page = pages[0]; 317da9d1d7fSPaul Mackerras got = KVMPPC_GOT_PAGE; 318c77162deSPaul Mackerras 319da9d1d7fSPaul Mackerras /* See if this is a large page */ 320da9d1d7fSPaul Mackerras s = PAGE_SIZE; 321da9d1d7fSPaul Mackerras if (PageHuge(page)) { 322da9d1d7fSPaul Mackerras hpage = compound_head(page); 323da9d1d7fSPaul Mackerras s <<= compound_order(hpage); 324da9d1d7fSPaul Mackerras /* Get the whole large page if slot alignment is ok */ 325da9d1d7fSPaul Mackerras if (s > psize && slot_is_aligned(memslot, s) && 326da9d1d7fSPaul Mackerras !(memslot->userspace_addr & (s - 1))) { 327da9d1d7fSPaul Mackerras start &= ~(s - 1); 328da9d1d7fSPaul Mackerras pgsize = s; 329de6c0b02SDavid Gibson get_page(hpage); 330de6c0b02SDavid Gibson put_page(page); 331da9d1d7fSPaul Mackerras page = hpage; 332c77162deSPaul Mackerras } 333da9d1d7fSPaul Mackerras } 334da9d1d7fSPaul Mackerras if (s < psize) 335da9d1d7fSPaul Mackerras goto out; 336c77162deSPaul Mackerras pfn = page_to_pfn(page); 3379d0ef5eaSPaul Mackerras } 338c77162deSPaul Mackerras 339da9d1d7fSPaul Mackerras npages = pgsize >> PAGE_SHIFT; 340da9d1d7fSPaul Mackerras pgorder = __ilog2(npages); 341da9d1d7fSPaul Mackerras physp += (gfn - memslot->base_gfn) & ~(npages - 1); 342c77162deSPaul Mackerras spin_lock(&kvm->arch.slot_phys_lock); 343da9d1d7fSPaul Mackerras for (i = 0; i < npages; ++i) { 344da9d1d7fSPaul Mackerras if (!physp[i]) { 3459d0ef5eaSPaul Mackerras physp[i] = ((pfn + i) << PAGE_SHIFT) + 3469d0ef5eaSPaul Mackerras got + is_io + pgorder; 347da9d1d7fSPaul Mackerras got = 0; 348da9d1d7fSPaul Mackerras } 349da9d1d7fSPaul Mackerras } 350c77162deSPaul Mackerras spin_unlock(&kvm->arch.slot_phys_lock); 351da9d1d7fSPaul Mackerras err = 0; 352c77162deSPaul Mackerras 353da9d1d7fSPaul Mackerras out: 354de6c0b02SDavid Gibson if (got) 355da9d1d7fSPaul Mackerras put_page(page); 356da9d1d7fSPaul Mackerras return err; 3579d0ef5eaSPaul Mackerras 3589d0ef5eaSPaul Mackerras up_err: 3599d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3609d0ef5eaSPaul Mackerras return err; 361c77162deSPaul Mackerras } 362c77162deSPaul Mackerras 3637ed661bfSPaul Mackerras long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 3647ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 3657ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret) 366c77162deSPaul Mackerras { 367c77162deSPaul Mackerras unsigned long psize, gpa, gfn; 368c77162deSPaul Mackerras struct kvm_memory_slot *memslot; 369c77162deSPaul Mackerras long ret; 370c77162deSPaul Mackerras 371342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 372342d3db7SPaul Mackerras goto do_insert; 373342d3db7SPaul Mackerras 374c77162deSPaul Mackerras psize = hpte_page_size(pteh, ptel); 375c77162deSPaul Mackerras if (!psize) 376c77162deSPaul Mackerras return H_PARAMETER; 377c77162deSPaul Mackerras 378697d3899SPaul Mackerras pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 379697d3899SPaul Mackerras 380c77162deSPaul Mackerras /* Find the memslot (if any) for this address */ 381c77162deSPaul Mackerras gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 382c77162deSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 383c77162deSPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 384697d3899SPaul Mackerras if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { 385da9d1d7fSPaul Mackerras if (!slot_is_aligned(memslot, psize)) 386da9d1d7fSPaul Mackerras return H_PARAMETER; 387da9d1d7fSPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) 388c77162deSPaul Mackerras return H_PARAMETER; 389697d3899SPaul Mackerras } 390c77162deSPaul Mackerras 391342d3db7SPaul Mackerras do_insert: 392342d3db7SPaul Mackerras /* Protect linux PTE lookup from page table destruction */ 393342d3db7SPaul Mackerras rcu_read_lock_sched(); /* this disables preemption too */ 3947ed661bfSPaul Mackerras ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, 3957ed661bfSPaul Mackerras current->mm->pgd, false, pte_idx_ret); 396342d3db7SPaul Mackerras rcu_read_unlock_sched(); 397c77162deSPaul Mackerras if (ret == H_TOO_HARD) { 398c77162deSPaul Mackerras /* this can't happen */ 399c77162deSPaul Mackerras pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 400c77162deSPaul Mackerras ret = H_RESOURCE; /* or something */ 401c77162deSPaul Mackerras } 402c77162deSPaul Mackerras return ret; 403c77162deSPaul Mackerras 404c77162deSPaul Mackerras } 405c77162deSPaul Mackerras 4067ed661bfSPaul Mackerras /* 4077ed661bfSPaul Mackerras * We come here on a H_ENTER call from the guest when we are not 4087ed661bfSPaul Mackerras * using mmu notifiers and we don't have the requested page pinned 4097ed661bfSPaul Mackerras * already. 4107ed661bfSPaul Mackerras */ 4117ed661bfSPaul Mackerras long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 4127ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 4137ed661bfSPaul Mackerras unsigned long ptel) 4147ed661bfSPaul Mackerras { 4157ed661bfSPaul Mackerras return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, 4167ed661bfSPaul Mackerras pteh, ptel, &vcpu->arch.gpr[4]); 4177ed661bfSPaul Mackerras } 4187ed661bfSPaul Mackerras 419697d3899SPaul Mackerras static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 420697d3899SPaul Mackerras gva_t eaddr) 421697d3899SPaul Mackerras { 422697d3899SPaul Mackerras u64 mask; 423697d3899SPaul Mackerras int i; 424697d3899SPaul Mackerras 425697d3899SPaul Mackerras for (i = 0; i < vcpu->arch.slb_nr; i++) { 426697d3899SPaul Mackerras if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) 427697d3899SPaul Mackerras continue; 428697d3899SPaul Mackerras 429697d3899SPaul Mackerras if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) 430697d3899SPaul Mackerras mask = ESID_MASK_1T; 431697d3899SPaul Mackerras else 432697d3899SPaul Mackerras mask = ESID_MASK; 433697d3899SPaul Mackerras 434697d3899SPaul Mackerras if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) 435697d3899SPaul Mackerras return &vcpu->arch.slb[i]; 436697d3899SPaul Mackerras } 437697d3899SPaul Mackerras return NULL; 438697d3899SPaul Mackerras } 439697d3899SPaul Mackerras 440697d3899SPaul Mackerras static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, 441697d3899SPaul Mackerras unsigned long ea) 442697d3899SPaul Mackerras { 443697d3899SPaul Mackerras unsigned long ra_mask; 444697d3899SPaul Mackerras 445697d3899SPaul Mackerras ra_mask = hpte_page_size(v, r) - 1; 446697d3899SPaul Mackerras return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); 447697d3899SPaul Mackerras } 448697d3899SPaul Mackerras 449de56a948SPaul Mackerras static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 45093b159b4SPaul Mackerras struct kvmppc_pte *gpte, bool data, bool iswrite) 451de56a948SPaul Mackerras { 452697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 453697d3899SPaul Mackerras struct kvmppc_slb *slbe; 454697d3899SPaul Mackerras unsigned long slb_v; 455697d3899SPaul Mackerras unsigned long pp, key; 456697d3899SPaul Mackerras unsigned long v, gr; 457697d3899SPaul Mackerras unsigned long *hptep; 458697d3899SPaul Mackerras int index; 459697d3899SPaul Mackerras int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 460697d3899SPaul Mackerras 461697d3899SPaul Mackerras /* Get SLB entry */ 462697d3899SPaul Mackerras if (virtmode) { 463697d3899SPaul Mackerras slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); 464697d3899SPaul Mackerras if (!slbe) 465697d3899SPaul Mackerras return -EINVAL; 466697d3899SPaul Mackerras slb_v = slbe->origv; 467697d3899SPaul Mackerras } else { 468697d3899SPaul Mackerras /* real mode access */ 469697d3899SPaul Mackerras slb_v = vcpu->kvm->arch.vrma_slb_v; 470697d3899SPaul Mackerras } 471697d3899SPaul Mackerras 472697d3899SPaul Mackerras /* Find the HPTE in the hash table */ 473697d3899SPaul Mackerras index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, 474697d3899SPaul Mackerras HPTE_V_VALID | HPTE_V_ABSENT); 475697d3899SPaul Mackerras if (index < 0) 476de56a948SPaul Mackerras return -ENOENT; 477697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 478697d3899SPaul Mackerras v = hptep[0] & ~HPTE_V_HVLOCK; 479697d3899SPaul Mackerras gr = kvm->arch.revmap[index].guest_rpte; 480697d3899SPaul Mackerras 481697d3899SPaul Mackerras /* Unlock the HPTE */ 482697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 483697d3899SPaul Mackerras hptep[0] = v; 484697d3899SPaul Mackerras 485697d3899SPaul Mackerras gpte->eaddr = eaddr; 486697d3899SPaul Mackerras gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); 487697d3899SPaul Mackerras 488697d3899SPaul Mackerras /* Get PP bits and key for permission check */ 489697d3899SPaul Mackerras pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 490697d3899SPaul Mackerras key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 491697d3899SPaul Mackerras key &= slb_v; 492697d3899SPaul Mackerras 493697d3899SPaul Mackerras /* Calculate permissions */ 494697d3899SPaul Mackerras gpte->may_read = hpte_read_permission(pp, key); 495697d3899SPaul Mackerras gpte->may_write = hpte_write_permission(pp, key); 496697d3899SPaul Mackerras gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); 497697d3899SPaul Mackerras 498697d3899SPaul Mackerras /* Storage key permission check for POWER7 */ 499697d3899SPaul Mackerras if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { 500697d3899SPaul Mackerras int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); 501697d3899SPaul Mackerras if (amrfield & 1) 502697d3899SPaul Mackerras gpte->may_read = 0; 503697d3899SPaul Mackerras if (amrfield & 2) 504697d3899SPaul Mackerras gpte->may_write = 0; 505697d3899SPaul Mackerras } 506697d3899SPaul Mackerras 507697d3899SPaul Mackerras /* Get the guest physical address */ 508697d3899SPaul Mackerras gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); 509697d3899SPaul Mackerras return 0; 510697d3899SPaul Mackerras } 511697d3899SPaul Mackerras 512697d3899SPaul Mackerras /* 513697d3899SPaul Mackerras * Quick test for whether an instruction is a load or a store. 514697d3899SPaul Mackerras * If the instruction is a load or a store, then this will indicate 515697d3899SPaul Mackerras * which it is, at least on server processors. (Embedded processors 516697d3899SPaul Mackerras * have some external PID instructions that don't follow the rule 517697d3899SPaul Mackerras * embodied here.) If the instruction isn't a load or store, then 518697d3899SPaul Mackerras * this doesn't return anything useful. 519697d3899SPaul Mackerras */ 520697d3899SPaul Mackerras static int instruction_is_store(unsigned int instr) 521697d3899SPaul Mackerras { 522697d3899SPaul Mackerras unsigned int mask; 523697d3899SPaul Mackerras 524697d3899SPaul Mackerras mask = 0x10000000; 525697d3899SPaul Mackerras if ((instr & 0xfc000000) == 0x7c000000) 526697d3899SPaul Mackerras mask = 0x100; /* major opcode 31 */ 527697d3899SPaul Mackerras return (instr & mask) != 0; 528697d3899SPaul Mackerras } 529697d3899SPaul Mackerras 530697d3899SPaul Mackerras static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 5316020c0f6SAlexander Graf unsigned long gpa, gva_t ea, int is_store) 532697d3899SPaul Mackerras { 533697d3899SPaul Mackerras int ret; 534697d3899SPaul Mackerras u32 last_inst; 535697d3899SPaul Mackerras unsigned long srr0 = kvmppc_get_pc(vcpu); 536697d3899SPaul Mackerras 537697d3899SPaul Mackerras /* We try to load the last instruction. We don't let 538697d3899SPaul Mackerras * emulate_instruction do it as it doesn't check what 539697d3899SPaul Mackerras * kvmppc_ld returns. 540697d3899SPaul Mackerras * If we fail, we just return to the guest and try executing it again. 541697d3899SPaul Mackerras */ 542697d3899SPaul Mackerras if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { 543697d3899SPaul Mackerras ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 544697d3899SPaul Mackerras if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) 545697d3899SPaul Mackerras return RESUME_GUEST; 546697d3899SPaul Mackerras vcpu->arch.last_inst = last_inst; 547697d3899SPaul Mackerras } 548697d3899SPaul Mackerras 549697d3899SPaul Mackerras /* 550697d3899SPaul Mackerras * WARNING: We do not know for sure whether the instruction we just 551697d3899SPaul Mackerras * read from memory is the same that caused the fault in the first 552697d3899SPaul Mackerras * place. If the instruction we read is neither an load or a store, 553697d3899SPaul Mackerras * then it can't access memory, so we don't need to worry about 554697d3899SPaul Mackerras * enforcing access permissions. So, assuming it is a load or 555697d3899SPaul Mackerras * store, we just check that its direction (load or store) is 556697d3899SPaul Mackerras * consistent with the original fault, since that's what we 557697d3899SPaul Mackerras * checked the access permissions against. If there is a mismatch 558697d3899SPaul Mackerras * we just return and retry the instruction. 559697d3899SPaul Mackerras */ 560697d3899SPaul Mackerras 561697d3899SPaul Mackerras if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) 562697d3899SPaul Mackerras return RESUME_GUEST; 563697d3899SPaul Mackerras 564697d3899SPaul Mackerras /* 565697d3899SPaul Mackerras * Emulated accesses are emulated by looking at the hash for 566697d3899SPaul Mackerras * translation once, then performing the access later. The 567697d3899SPaul Mackerras * translation could be invalidated in the meantime in which 568697d3899SPaul Mackerras * point performing the subsequent memory access on the old 569697d3899SPaul Mackerras * physical address could possibly be a security hole for the 570697d3899SPaul Mackerras * guest (but not the host). 571697d3899SPaul Mackerras * 572697d3899SPaul Mackerras * This is less of an issue for MMIO stores since they aren't 573697d3899SPaul Mackerras * globally visible. It could be an issue for MMIO loads to 574697d3899SPaul Mackerras * a certain extent but we'll ignore it for now. 575697d3899SPaul Mackerras */ 576697d3899SPaul Mackerras 577697d3899SPaul Mackerras vcpu->arch.paddr_accessed = gpa; 5786020c0f6SAlexander Graf vcpu->arch.vaddr_accessed = ea; 579697d3899SPaul Mackerras return kvmppc_emulate_mmio(run, vcpu); 580697d3899SPaul Mackerras } 581697d3899SPaul Mackerras 582697d3899SPaul Mackerras int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 583697d3899SPaul Mackerras unsigned long ea, unsigned long dsisr) 584697d3899SPaul Mackerras { 585697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 586342d3db7SPaul Mackerras unsigned long *hptep, hpte[3], r; 587342d3db7SPaul Mackerras unsigned long mmu_seq, psize, pte_size; 58870bddfefSPaul Mackerras unsigned long gpa, gfn, hva, pfn; 589697d3899SPaul Mackerras struct kvm_memory_slot *memslot; 590342d3db7SPaul Mackerras unsigned long *rmap; 591697d3899SPaul Mackerras struct revmap_entry *rev; 592342d3db7SPaul Mackerras struct page *page, *pages[1]; 593342d3db7SPaul Mackerras long index, ret, npages; 594342d3db7SPaul Mackerras unsigned long is_io; 5954cf302bcSPaul Mackerras unsigned int writing, write_ok; 596342d3db7SPaul Mackerras struct vm_area_struct *vma; 597bad3b507SPaul Mackerras unsigned long rcbits; 598697d3899SPaul Mackerras 599697d3899SPaul Mackerras /* 600697d3899SPaul Mackerras * Real-mode code has already searched the HPT and found the 601697d3899SPaul Mackerras * entry we're interested in. Lock the entry and check that 602697d3899SPaul Mackerras * it hasn't changed. If it has, just return and re-execute the 603697d3899SPaul Mackerras * instruction. 604697d3899SPaul Mackerras */ 605697d3899SPaul Mackerras if (ea != vcpu->arch.pgfault_addr) 606697d3899SPaul Mackerras return RESUME_GUEST; 607697d3899SPaul Mackerras index = vcpu->arch.pgfault_index; 608697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 609697d3899SPaul Mackerras rev = &kvm->arch.revmap[index]; 610697d3899SPaul Mackerras preempt_disable(); 611697d3899SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 612697d3899SPaul Mackerras cpu_relax(); 613697d3899SPaul Mackerras hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 614697d3899SPaul Mackerras hpte[1] = hptep[1]; 615342d3db7SPaul Mackerras hpte[2] = r = rev->guest_rpte; 616697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 617697d3899SPaul Mackerras hptep[0] = hpte[0]; 618697d3899SPaul Mackerras preempt_enable(); 619697d3899SPaul Mackerras 620697d3899SPaul Mackerras if (hpte[0] != vcpu->arch.pgfault_hpte[0] || 621697d3899SPaul Mackerras hpte[1] != vcpu->arch.pgfault_hpte[1]) 622697d3899SPaul Mackerras return RESUME_GUEST; 623697d3899SPaul Mackerras 624697d3899SPaul Mackerras /* Translate the logical address and get the page */ 625342d3db7SPaul Mackerras psize = hpte_page_size(hpte[0], r); 62670bddfefSPaul Mackerras gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); 62770bddfefSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 628697d3899SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 629697d3899SPaul Mackerras 630697d3899SPaul Mackerras /* No memslot means it's an emulated MMIO region */ 63170bddfefSPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 6326020c0f6SAlexander Graf return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 633697d3899SPaul Mackerras dsisr & DSISR_ISSTORE); 634697d3899SPaul Mackerras 635342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 636342d3db7SPaul Mackerras return -EFAULT; /* should never get here */ 637342d3db7SPaul Mackerras 638342d3db7SPaul Mackerras /* used to check for invalidations in progress */ 639342d3db7SPaul Mackerras mmu_seq = kvm->mmu_notifier_seq; 640342d3db7SPaul Mackerras smp_rmb(); 641342d3db7SPaul Mackerras 642342d3db7SPaul Mackerras is_io = 0; 643342d3db7SPaul Mackerras pfn = 0; 644342d3db7SPaul Mackerras page = NULL; 645342d3db7SPaul Mackerras pte_size = PAGE_SIZE; 6464cf302bcSPaul Mackerras writing = (dsisr & DSISR_ISSTORE) != 0; 6474cf302bcSPaul Mackerras /* If writing != 0, then the HPTE must allow writing, if we get here */ 6484cf302bcSPaul Mackerras write_ok = writing; 649342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 6504cf302bcSPaul Mackerras npages = get_user_pages_fast(hva, 1, writing, pages); 651342d3db7SPaul Mackerras if (npages < 1) { 652342d3db7SPaul Mackerras /* Check if it's an I/O mapping */ 653342d3db7SPaul Mackerras down_read(¤t->mm->mmap_sem); 654342d3db7SPaul Mackerras vma = find_vma(current->mm, hva); 655342d3db7SPaul Mackerras if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && 656342d3db7SPaul Mackerras (vma->vm_flags & VM_PFNMAP)) { 657342d3db7SPaul Mackerras pfn = vma->vm_pgoff + 658342d3db7SPaul Mackerras ((hva - vma->vm_start) >> PAGE_SHIFT); 659342d3db7SPaul Mackerras pte_size = psize; 660342d3db7SPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 6614cf302bcSPaul Mackerras write_ok = vma->vm_flags & VM_WRITE; 662342d3db7SPaul Mackerras } 663342d3db7SPaul Mackerras up_read(¤t->mm->mmap_sem); 664342d3db7SPaul Mackerras if (!pfn) 665697d3899SPaul Mackerras return -EFAULT; 666342d3db7SPaul Mackerras } else { 667342d3db7SPaul Mackerras page = pages[0]; 668342d3db7SPaul Mackerras if (PageHuge(page)) { 669342d3db7SPaul Mackerras page = compound_head(page); 670342d3db7SPaul Mackerras pte_size <<= compound_order(page); 671342d3db7SPaul Mackerras } 6724cf302bcSPaul Mackerras /* if the guest wants write access, see if that is OK */ 6734cf302bcSPaul Mackerras if (!writing && hpte_is_writable(r)) { 674db7cb5b9SAneesh Kumar K.V unsigned int hugepage_shift; 6754cf302bcSPaul Mackerras pte_t *ptep, pte; 6764cf302bcSPaul Mackerras 6774cf302bcSPaul Mackerras /* 6784cf302bcSPaul Mackerras * We need to protect against page table destruction 6794cf302bcSPaul Mackerras * while looking up and updating the pte. 6804cf302bcSPaul Mackerras */ 6814cf302bcSPaul Mackerras rcu_read_lock_sched(); 6824cf302bcSPaul Mackerras ptep = find_linux_pte_or_hugepte(current->mm->pgd, 683db7cb5b9SAneesh Kumar K.V hva, &hugepage_shift); 684db7cb5b9SAneesh Kumar K.V if (ptep) { 685db7cb5b9SAneesh Kumar K.V pte = kvmppc_read_update_linux_pte(ptep, 1, 686db7cb5b9SAneesh Kumar K.V hugepage_shift); 6874cf302bcSPaul Mackerras if (pte_write(pte)) 6884cf302bcSPaul Mackerras write_ok = 1; 6894cf302bcSPaul Mackerras } 6904cf302bcSPaul Mackerras rcu_read_unlock_sched(); 6914cf302bcSPaul Mackerras } 692342d3db7SPaul Mackerras pfn = page_to_pfn(page); 693342d3db7SPaul Mackerras } 694342d3db7SPaul Mackerras 695342d3db7SPaul Mackerras ret = -EFAULT; 696342d3db7SPaul Mackerras if (psize > pte_size) 697342d3db7SPaul Mackerras goto out_put; 698342d3db7SPaul Mackerras 699342d3db7SPaul Mackerras /* Check WIMG vs. the actual page we're accessing */ 700342d3db7SPaul Mackerras if (!hpte_cache_flags_ok(r, is_io)) { 701342d3db7SPaul Mackerras if (is_io) 702342d3db7SPaul Mackerras return -EFAULT; 703342d3db7SPaul Mackerras /* 704342d3db7SPaul Mackerras * Allow guest to map emulated device memory as 705342d3db7SPaul Mackerras * uncacheable, but actually make it cacheable. 706342d3db7SPaul Mackerras */ 707342d3db7SPaul Mackerras r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; 708342d3db7SPaul Mackerras } 709342d3db7SPaul Mackerras 710342d3db7SPaul Mackerras /* Set the HPTE to point to pfn */ 711342d3db7SPaul Mackerras r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); 7124cf302bcSPaul Mackerras if (hpte_is_writable(r) && !write_ok) 7134cf302bcSPaul Mackerras r = hpte_make_readonly(r); 714342d3db7SPaul Mackerras ret = RESUME_GUEST; 715342d3db7SPaul Mackerras preempt_disable(); 716342d3db7SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 717342d3db7SPaul Mackerras cpu_relax(); 718342d3db7SPaul Mackerras if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || 719342d3db7SPaul Mackerras rev->guest_rpte != hpte[2]) 720342d3db7SPaul Mackerras /* HPTE has been changed under us; let the guest retry */ 721342d3db7SPaul Mackerras goto out_unlock; 722342d3db7SPaul Mackerras hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 723342d3db7SPaul Mackerras 724d89cc617STakuya Yoshikawa rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 725342d3db7SPaul Mackerras lock_rmap(rmap); 726342d3db7SPaul Mackerras 727342d3db7SPaul Mackerras /* Check if we might have been invalidated; let the guest retry if so */ 728342d3db7SPaul Mackerras ret = RESUME_GUEST; 7298ca40a70SChristoffer Dall if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 730342d3db7SPaul Mackerras unlock_rmap(rmap); 731342d3db7SPaul Mackerras goto out_unlock; 732342d3db7SPaul Mackerras } 7334cf302bcSPaul Mackerras 734bad3b507SPaul Mackerras /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ 735bad3b507SPaul Mackerras rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 736bad3b507SPaul Mackerras r &= rcbits | ~(HPTE_R_R | HPTE_R_C); 737bad3b507SPaul Mackerras 7384cf302bcSPaul Mackerras if (hptep[0] & HPTE_V_VALID) { 7394cf302bcSPaul Mackerras /* HPTE was previously valid, so we need to invalidate it */ 7404cf302bcSPaul Mackerras unlock_rmap(rmap); 7414cf302bcSPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 7424cf302bcSPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, index); 743bad3b507SPaul Mackerras /* don't lose previous R and C bits */ 744bad3b507SPaul Mackerras r |= hptep[1] & (HPTE_R_R | HPTE_R_C); 7454cf302bcSPaul Mackerras } else { 746342d3db7SPaul Mackerras kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 7474cf302bcSPaul Mackerras } 748342d3db7SPaul Mackerras 749342d3db7SPaul Mackerras hptep[1] = r; 750342d3db7SPaul Mackerras eieio(); 751342d3db7SPaul Mackerras hptep[0] = hpte[0]; 752342d3db7SPaul Mackerras asm volatile("ptesync" : : : "memory"); 753342d3db7SPaul Mackerras preempt_enable(); 7544cf302bcSPaul Mackerras if (page && hpte_is_writable(r)) 755342d3db7SPaul Mackerras SetPageDirty(page); 756342d3db7SPaul Mackerras 757342d3db7SPaul Mackerras out_put: 758de6c0b02SDavid Gibson if (page) { 759de6c0b02SDavid Gibson /* 760de6c0b02SDavid Gibson * We drop pages[0] here, not page because page might 761de6c0b02SDavid Gibson * have been set to the head page of a compound, but 762de6c0b02SDavid Gibson * we have to drop the reference on the correct tail 763de6c0b02SDavid Gibson * page to match the get inside gup() 764de6c0b02SDavid Gibson */ 765de6c0b02SDavid Gibson put_page(pages[0]); 766de6c0b02SDavid Gibson } 767342d3db7SPaul Mackerras return ret; 768342d3db7SPaul Mackerras 769342d3db7SPaul Mackerras out_unlock: 770342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 771342d3db7SPaul Mackerras preempt_enable(); 772342d3db7SPaul Mackerras goto out_put; 773342d3db7SPaul Mackerras } 774342d3db7SPaul Mackerras 775a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm) 776a64fd707SPaul Mackerras { 777a64fd707SPaul Mackerras struct kvm_memslots *slots; 778a64fd707SPaul Mackerras struct kvm_memory_slot *memslot; 779a64fd707SPaul Mackerras int srcu_idx; 780a64fd707SPaul Mackerras 781a64fd707SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 782a64fd707SPaul Mackerras slots = kvm->memslots; 783a64fd707SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 784a64fd707SPaul Mackerras /* 785a64fd707SPaul Mackerras * This assumes it is acceptable to lose reference and 786a64fd707SPaul Mackerras * change bits across a reset. 787a64fd707SPaul Mackerras */ 788a64fd707SPaul Mackerras memset(memslot->arch.rmap, 0, 789a64fd707SPaul Mackerras memslot->npages * sizeof(*memslot->arch.rmap)); 790a64fd707SPaul Mackerras } 791a64fd707SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 792a64fd707SPaul Mackerras } 793a64fd707SPaul Mackerras 79484504ef3STakuya Yoshikawa static int kvm_handle_hva_range(struct kvm *kvm, 79584504ef3STakuya Yoshikawa unsigned long start, 79684504ef3STakuya Yoshikawa unsigned long end, 79784504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, 79884504ef3STakuya Yoshikawa unsigned long *rmapp, 799342d3db7SPaul Mackerras unsigned long gfn)) 800342d3db7SPaul Mackerras { 801342d3db7SPaul Mackerras int ret; 802342d3db7SPaul Mackerras int retval = 0; 803342d3db7SPaul Mackerras struct kvm_memslots *slots; 804342d3db7SPaul Mackerras struct kvm_memory_slot *memslot; 805342d3db7SPaul Mackerras 806342d3db7SPaul Mackerras slots = kvm_memslots(kvm); 807342d3db7SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 80884504ef3STakuya Yoshikawa unsigned long hva_start, hva_end; 80984504ef3STakuya Yoshikawa gfn_t gfn, gfn_end; 810342d3db7SPaul Mackerras 81184504ef3STakuya Yoshikawa hva_start = max(start, memslot->userspace_addr); 81284504ef3STakuya Yoshikawa hva_end = min(end, memslot->userspace_addr + 81384504ef3STakuya Yoshikawa (memslot->npages << PAGE_SHIFT)); 81484504ef3STakuya Yoshikawa if (hva_start >= hva_end) 81584504ef3STakuya Yoshikawa continue; 81684504ef3STakuya Yoshikawa /* 81784504ef3STakuya Yoshikawa * {gfn(page) | page intersects with [hva_start, hva_end)} = 81884504ef3STakuya Yoshikawa * {gfn, gfn+1, ..., gfn_end-1}. 81984504ef3STakuya Yoshikawa */ 82084504ef3STakuya Yoshikawa gfn = hva_to_gfn_memslot(hva_start, memslot); 82184504ef3STakuya Yoshikawa gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 82284504ef3STakuya Yoshikawa 82384504ef3STakuya Yoshikawa for (; gfn < gfn_end; ++gfn) { 824d19a748bSTakuya Yoshikawa gfn_t gfn_offset = gfn - memslot->base_gfn; 825342d3db7SPaul Mackerras 826d89cc617STakuya Yoshikawa ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); 827342d3db7SPaul Mackerras retval |= ret; 828342d3db7SPaul Mackerras } 829342d3db7SPaul Mackerras } 830342d3db7SPaul Mackerras 831342d3db7SPaul Mackerras return retval; 832342d3db7SPaul Mackerras } 833342d3db7SPaul Mackerras 83484504ef3STakuya Yoshikawa static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 83584504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, unsigned long *rmapp, 83684504ef3STakuya Yoshikawa unsigned long gfn)) 83784504ef3STakuya Yoshikawa { 83884504ef3STakuya Yoshikawa return kvm_handle_hva_range(kvm, hva, hva + 1, handler); 83984504ef3STakuya Yoshikawa } 84084504ef3STakuya Yoshikawa 841342d3db7SPaul Mackerras static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 842342d3db7SPaul Mackerras unsigned long gfn) 843342d3db7SPaul Mackerras { 844342d3db7SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 845342d3db7SPaul Mackerras unsigned long h, i, j; 846342d3db7SPaul Mackerras unsigned long *hptep; 847bad3b507SPaul Mackerras unsigned long ptel, psize, rcbits; 848342d3db7SPaul Mackerras 849342d3db7SPaul Mackerras for (;;) { 850bad3b507SPaul Mackerras lock_rmap(rmapp); 851342d3db7SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 852bad3b507SPaul Mackerras unlock_rmap(rmapp); 853342d3db7SPaul Mackerras break; 854342d3db7SPaul Mackerras } 855342d3db7SPaul Mackerras 856342d3db7SPaul Mackerras /* 857342d3db7SPaul Mackerras * To avoid an ABBA deadlock with the HPTE lock bit, 858bad3b507SPaul Mackerras * we can't spin on the HPTE lock while holding the 859bad3b507SPaul Mackerras * rmap chain lock. 860342d3db7SPaul Mackerras */ 861342d3db7SPaul Mackerras i = *rmapp & KVMPPC_RMAP_INDEX; 862bad3b507SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 863bad3b507SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 864bad3b507SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 865bad3b507SPaul Mackerras unlock_rmap(rmapp); 866bad3b507SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 867bad3b507SPaul Mackerras cpu_relax(); 868bad3b507SPaul Mackerras continue; 869bad3b507SPaul Mackerras } 870342d3db7SPaul Mackerras j = rev[i].forw; 871342d3db7SPaul Mackerras if (j == i) { 872342d3db7SPaul Mackerras /* chain is now empty */ 873bad3b507SPaul Mackerras *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 874342d3db7SPaul Mackerras } else { 875342d3db7SPaul Mackerras /* remove i from chain */ 876342d3db7SPaul Mackerras h = rev[i].back; 877342d3db7SPaul Mackerras rev[h].forw = j; 878342d3db7SPaul Mackerras rev[j].back = h; 879342d3db7SPaul Mackerras rev[i].forw = rev[i].back = i; 880bad3b507SPaul Mackerras *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; 881342d3db7SPaul Mackerras } 882342d3db7SPaul Mackerras 883bad3b507SPaul Mackerras /* Now check and modify the HPTE */ 884342d3db7SPaul Mackerras ptel = rev[i].guest_rpte; 885342d3db7SPaul Mackerras psize = hpte_page_size(hptep[0], ptel); 886342d3db7SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && 887342d3db7SPaul Mackerras hpte_rpn(ptel, psize) == gfn) { 888dfe49dbdSPaul Mackerras if (kvm->arch.using_mmu_notifiers) 889342d3db7SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 890bad3b507SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 891bad3b507SPaul Mackerras /* Harvest R and C */ 892bad3b507SPaul Mackerras rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 893bad3b507SPaul Mackerras *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 894a1b4a0f6SPaul Mackerras if (rcbits & ~rev[i].guest_rpte) { 895bad3b507SPaul Mackerras rev[i].guest_rpte = ptel | rcbits; 896a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 897a1b4a0f6SPaul Mackerras } 898342d3db7SPaul Mackerras } 899bad3b507SPaul Mackerras unlock_rmap(rmapp); 900342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 901342d3db7SPaul Mackerras } 902342d3db7SPaul Mackerras return 0; 903342d3db7SPaul Mackerras } 904342d3db7SPaul Mackerras 9053a167beaSAneesh Kumar K.V int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 906342d3db7SPaul Mackerras { 907342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 908342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 909342d3db7SPaul Mackerras return 0; 910342d3db7SPaul Mackerras } 911342d3db7SPaul Mackerras 9123a167beaSAneesh Kumar K.V int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 913b3ae2096STakuya Yoshikawa { 914b3ae2096STakuya Yoshikawa if (kvm->arch.using_mmu_notifiers) 915b3ae2096STakuya Yoshikawa kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 916b3ae2096STakuya Yoshikawa return 0; 917b3ae2096STakuya Yoshikawa } 918b3ae2096STakuya Yoshikawa 9193a167beaSAneesh Kumar K.V void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 9203a167beaSAneesh Kumar K.V struct kvm_memory_slot *memslot) 921dfe49dbdSPaul Mackerras { 922dfe49dbdSPaul Mackerras unsigned long *rmapp; 923dfe49dbdSPaul Mackerras unsigned long gfn; 924dfe49dbdSPaul Mackerras unsigned long n; 925dfe49dbdSPaul Mackerras 926dfe49dbdSPaul Mackerras rmapp = memslot->arch.rmap; 927dfe49dbdSPaul Mackerras gfn = memslot->base_gfn; 928dfe49dbdSPaul Mackerras for (n = memslot->npages; n; --n) { 929dfe49dbdSPaul Mackerras /* 930dfe49dbdSPaul Mackerras * Testing the present bit without locking is OK because 931dfe49dbdSPaul Mackerras * the memslot has been marked invalid already, and hence 932dfe49dbdSPaul Mackerras * no new HPTEs referencing this page can be created, 933dfe49dbdSPaul Mackerras * thus the present bit can't go from 0 to 1. 934dfe49dbdSPaul Mackerras */ 935dfe49dbdSPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) 936dfe49dbdSPaul Mackerras kvm_unmap_rmapp(kvm, rmapp, gfn); 937dfe49dbdSPaul Mackerras ++rmapp; 938dfe49dbdSPaul Mackerras ++gfn; 939dfe49dbdSPaul Mackerras } 940dfe49dbdSPaul Mackerras } 941dfe49dbdSPaul Mackerras 942342d3db7SPaul Mackerras static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 943342d3db7SPaul Mackerras unsigned long gfn) 944342d3db7SPaul Mackerras { 94555514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 94655514893SPaul Mackerras unsigned long head, i, j; 94755514893SPaul Mackerras unsigned long *hptep; 94855514893SPaul Mackerras int ret = 0; 94955514893SPaul Mackerras 95055514893SPaul Mackerras retry: 95155514893SPaul Mackerras lock_rmap(rmapp); 95255514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) { 953bad3b507SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_REFERENCED; 95455514893SPaul Mackerras ret = 1; 95555514893SPaul Mackerras } 95655514893SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 95755514893SPaul Mackerras unlock_rmap(rmapp); 95855514893SPaul Mackerras return ret; 95955514893SPaul Mackerras } 96055514893SPaul Mackerras 96155514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 96255514893SPaul Mackerras do { 96355514893SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 96455514893SPaul Mackerras j = rev[i].forw; 96555514893SPaul Mackerras 96655514893SPaul Mackerras /* If this HPTE isn't referenced, ignore it */ 96755514893SPaul Mackerras if (!(hptep[1] & HPTE_R_R)) 96855514893SPaul Mackerras continue; 96955514893SPaul Mackerras 97055514893SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 97155514893SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 97255514893SPaul Mackerras unlock_rmap(rmapp); 97355514893SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 97455514893SPaul Mackerras cpu_relax(); 97555514893SPaul Mackerras goto retry; 97655514893SPaul Mackerras } 97755514893SPaul Mackerras 97855514893SPaul Mackerras /* Now check and modify the HPTE */ 97955514893SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 98055514893SPaul Mackerras kvmppc_clear_ref_hpte(kvm, hptep, i); 981a1b4a0f6SPaul Mackerras if (!(rev[i].guest_rpte & HPTE_R_R)) { 98255514893SPaul Mackerras rev[i].guest_rpte |= HPTE_R_R; 983a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 984a1b4a0f6SPaul Mackerras } 98555514893SPaul Mackerras ret = 1; 98655514893SPaul Mackerras } 98755514893SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 98855514893SPaul Mackerras } while ((i = j) != head); 98955514893SPaul Mackerras 99055514893SPaul Mackerras unlock_rmap(rmapp); 99155514893SPaul Mackerras return ret; 992342d3db7SPaul Mackerras } 993342d3db7SPaul Mackerras 9943a167beaSAneesh Kumar K.V int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) 995342d3db7SPaul Mackerras { 996342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 997342d3db7SPaul Mackerras return 0; 998342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 999342d3db7SPaul Mackerras } 1000342d3db7SPaul Mackerras 1001342d3db7SPaul Mackerras static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1002342d3db7SPaul Mackerras unsigned long gfn) 1003342d3db7SPaul Mackerras { 100455514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 100555514893SPaul Mackerras unsigned long head, i, j; 100655514893SPaul Mackerras unsigned long *hp; 100755514893SPaul Mackerras int ret = 1; 100855514893SPaul Mackerras 100955514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 101055514893SPaul Mackerras return 1; 101155514893SPaul Mackerras 101255514893SPaul Mackerras lock_rmap(rmapp); 101355514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 101455514893SPaul Mackerras goto out; 101555514893SPaul Mackerras 101655514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) { 101755514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 101855514893SPaul Mackerras do { 101955514893SPaul Mackerras hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); 102055514893SPaul Mackerras j = rev[i].forw; 102155514893SPaul Mackerras if (hp[1] & HPTE_R_R) 102255514893SPaul Mackerras goto out; 102355514893SPaul Mackerras } while ((i = j) != head); 102455514893SPaul Mackerras } 102555514893SPaul Mackerras ret = 0; 102655514893SPaul Mackerras 102755514893SPaul Mackerras out: 102855514893SPaul Mackerras unlock_rmap(rmapp); 102955514893SPaul Mackerras return ret; 1030342d3db7SPaul Mackerras } 1031342d3db7SPaul Mackerras 10323a167beaSAneesh Kumar K.V int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 1033342d3db7SPaul Mackerras { 1034342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1035342d3db7SPaul Mackerras return 0; 1036342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1037342d3db7SPaul Mackerras } 1038342d3db7SPaul Mackerras 10393a167beaSAneesh Kumar K.V void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 1040342d3db7SPaul Mackerras { 1041342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1042342d3db7SPaul Mackerras return; 1043342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 1044de56a948SPaul Mackerras } 1045de56a948SPaul Mackerras 104682ed3616SPaul Mackerras static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 104782ed3616SPaul Mackerras { 104882ed3616SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 104982ed3616SPaul Mackerras unsigned long head, i, j; 105082ed3616SPaul Mackerras unsigned long *hptep; 105182ed3616SPaul Mackerras int ret = 0; 105282ed3616SPaul Mackerras 105382ed3616SPaul Mackerras retry: 105482ed3616SPaul Mackerras lock_rmap(rmapp); 105582ed3616SPaul Mackerras if (*rmapp & KVMPPC_RMAP_CHANGED) { 105682ed3616SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_CHANGED; 105782ed3616SPaul Mackerras ret = 1; 105882ed3616SPaul Mackerras } 105982ed3616SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 106082ed3616SPaul Mackerras unlock_rmap(rmapp); 106182ed3616SPaul Mackerras return ret; 106282ed3616SPaul Mackerras } 106382ed3616SPaul Mackerras 106482ed3616SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 106582ed3616SPaul Mackerras do { 106682ed3616SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 106782ed3616SPaul Mackerras j = rev[i].forw; 106882ed3616SPaul Mackerras 106982ed3616SPaul Mackerras if (!(hptep[1] & HPTE_R_C)) 107082ed3616SPaul Mackerras continue; 107182ed3616SPaul Mackerras 107282ed3616SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 107382ed3616SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 107482ed3616SPaul Mackerras unlock_rmap(rmapp); 107582ed3616SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 107682ed3616SPaul Mackerras cpu_relax(); 107782ed3616SPaul Mackerras goto retry; 107882ed3616SPaul Mackerras } 107982ed3616SPaul Mackerras 108082ed3616SPaul Mackerras /* Now check and modify the HPTE */ 108182ed3616SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { 108282ed3616SPaul Mackerras /* need to make it temporarily absent to clear C */ 108382ed3616SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 108482ed3616SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 108582ed3616SPaul Mackerras hptep[1] &= ~HPTE_R_C; 108682ed3616SPaul Mackerras eieio(); 108782ed3616SPaul Mackerras hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 1088a1b4a0f6SPaul Mackerras if (!(rev[i].guest_rpte & HPTE_R_C)) { 108982ed3616SPaul Mackerras rev[i].guest_rpte |= HPTE_R_C; 1090a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 1091a1b4a0f6SPaul Mackerras } 109282ed3616SPaul Mackerras ret = 1; 109382ed3616SPaul Mackerras } 109482ed3616SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 109582ed3616SPaul Mackerras } while ((i = j) != head); 109682ed3616SPaul Mackerras 109782ed3616SPaul Mackerras unlock_rmap(rmapp); 109882ed3616SPaul Mackerras return ret; 109982ed3616SPaul Mackerras } 110082ed3616SPaul Mackerras 1101c35635efSPaul Mackerras static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1102c35635efSPaul Mackerras struct kvm_memory_slot *memslot, 1103c35635efSPaul Mackerras unsigned long *map) 1104c35635efSPaul Mackerras { 1105c35635efSPaul Mackerras unsigned long gfn; 1106c35635efSPaul Mackerras 1107c35635efSPaul Mackerras if (!vpa->dirty || !vpa->pinned_addr) 1108c35635efSPaul Mackerras return; 1109c35635efSPaul Mackerras gfn = vpa->gpa >> PAGE_SHIFT; 1110c35635efSPaul Mackerras if (gfn < memslot->base_gfn || 1111c35635efSPaul Mackerras gfn >= memslot->base_gfn + memslot->npages) 1112c35635efSPaul Mackerras return; 1113c35635efSPaul Mackerras 1114c35635efSPaul Mackerras vpa->dirty = false; 1115c35635efSPaul Mackerras if (map) 1116c35635efSPaul Mackerras __set_bit_le(gfn - memslot->base_gfn, map); 1117c35635efSPaul Mackerras } 1118c35635efSPaul Mackerras 1119dfe49dbdSPaul Mackerras long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1120dfe49dbdSPaul Mackerras unsigned long *map) 112182ed3616SPaul Mackerras { 112282ed3616SPaul Mackerras unsigned long i; 1123dfe49dbdSPaul Mackerras unsigned long *rmapp; 1124c35635efSPaul Mackerras struct kvm_vcpu *vcpu; 112582ed3616SPaul Mackerras 112682ed3616SPaul Mackerras preempt_disable(); 1127d89cc617STakuya Yoshikawa rmapp = memslot->arch.rmap; 112882ed3616SPaul Mackerras for (i = 0; i < memslot->npages; ++i) { 1129dfe49dbdSPaul Mackerras if (kvm_test_clear_dirty(kvm, rmapp) && map) 113082ed3616SPaul Mackerras __set_bit_le(i, map); 113182ed3616SPaul Mackerras ++rmapp; 113282ed3616SPaul Mackerras } 1133c35635efSPaul Mackerras 1134c35635efSPaul Mackerras /* Harvest dirty bits from VPA and DTL updates */ 1135c35635efSPaul Mackerras /* Note: we never modify the SLB shadow buffer areas */ 1136c35635efSPaul Mackerras kvm_for_each_vcpu(i, vcpu, kvm) { 1137c35635efSPaul Mackerras spin_lock(&vcpu->arch.vpa_update_lock); 1138c35635efSPaul Mackerras harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); 1139c35635efSPaul Mackerras harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); 1140c35635efSPaul Mackerras spin_unlock(&vcpu->arch.vpa_update_lock); 1141c35635efSPaul Mackerras } 114282ed3616SPaul Mackerras preempt_enable(); 114382ed3616SPaul Mackerras return 0; 114482ed3616SPaul Mackerras } 114582ed3616SPaul Mackerras 114693e60249SPaul Mackerras void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 114793e60249SPaul Mackerras unsigned long *nb_ret) 114893e60249SPaul Mackerras { 114993e60249SPaul Mackerras struct kvm_memory_slot *memslot; 115093e60249SPaul Mackerras unsigned long gfn = gpa >> PAGE_SHIFT; 1151342d3db7SPaul Mackerras struct page *page, *pages[1]; 1152342d3db7SPaul Mackerras int npages; 1153c35635efSPaul Mackerras unsigned long hva, offset; 1154da9d1d7fSPaul Mackerras unsigned long pa; 115593e60249SPaul Mackerras unsigned long *physp; 11562c9097e4SPaul Mackerras int srcu_idx; 115793e60249SPaul Mackerras 11582c9097e4SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 115993e60249SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 116093e60249SPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 11612c9097e4SPaul Mackerras goto err; 1162342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) { 1163a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 116493e60249SPaul Mackerras if (!physp) 11652c9097e4SPaul Mackerras goto err; 1166da9d1d7fSPaul Mackerras physp += gfn - memslot->base_gfn; 116793e60249SPaul Mackerras pa = *physp; 1168c77162deSPaul Mackerras if (!pa) { 1169342d3db7SPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, 1170342d3db7SPaul Mackerras PAGE_SIZE) < 0) 11712c9097e4SPaul Mackerras goto err; 1172c77162deSPaul Mackerras pa = *physp; 1173c77162deSPaul Mackerras } 1174da9d1d7fSPaul Mackerras page = pfn_to_page(pa >> PAGE_SHIFT); 1175de6c0b02SDavid Gibson get_page(page); 1176342d3db7SPaul Mackerras } else { 1177342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 1178342d3db7SPaul Mackerras npages = get_user_pages_fast(hva, 1, 1, pages); 1179342d3db7SPaul Mackerras if (npages < 1) 11802c9097e4SPaul Mackerras goto err; 1181342d3db7SPaul Mackerras page = pages[0]; 1182342d3db7SPaul Mackerras } 11832c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11842c9097e4SPaul Mackerras 1185c35635efSPaul Mackerras offset = gpa & (PAGE_SIZE - 1); 118693e60249SPaul Mackerras if (nb_ret) 1187c35635efSPaul Mackerras *nb_ret = PAGE_SIZE - offset; 118893e60249SPaul Mackerras return page_address(page) + offset; 11892c9097e4SPaul Mackerras 11902c9097e4SPaul Mackerras err: 11912c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11922c9097e4SPaul Mackerras return NULL; 119393e60249SPaul Mackerras } 119493e60249SPaul Mackerras 1195c35635efSPaul Mackerras void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, 1196c35635efSPaul Mackerras bool dirty) 119793e60249SPaul Mackerras { 119893e60249SPaul Mackerras struct page *page = virt_to_page(va); 1199c35635efSPaul Mackerras struct kvm_memory_slot *memslot; 1200c35635efSPaul Mackerras unsigned long gfn; 1201c35635efSPaul Mackerras unsigned long *rmap; 1202c35635efSPaul Mackerras int srcu_idx; 120393e60249SPaul Mackerras 120493e60249SPaul Mackerras put_page(page); 1205c35635efSPaul Mackerras 1206c35635efSPaul Mackerras if (!dirty || !kvm->arch.using_mmu_notifiers) 1207c35635efSPaul Mackerras return; 1208c35635efSPaul Mackerras 1209c35635efSPaul Mackerras /* We need to mark this page dirty in the rmap chain */ 1210c35635efSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 1211c35635efSPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 1212c35635efSPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 1213c35635efSPaul Mackerras if (memslot) { 1214c35635efSPaul Mackerras rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1215c35635efSPaul Mackerras lock_rmap(rmap); 1216c35635efSPaul Mackerras *rmap |= KVMPPC_RMAP_CHANGED; 1217c35635efSPaul Mackerras unlock_rmap(rmap); 1218c35635efSPaul Mackerras } 1219c35635efSPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 122093e60249SPaul Mackerras } 122193e60249SPaul Mackerras 1222a2932923SPaul Mackerras /* 1223a2932923SPaul Mackerras * Functions for reading and writing the hash table via reads and 1224a2932923SPaul Mackerras * writes on a file descriptor. 1225a2932923SPaul Mackerras * 1226a2932923SPaul Mackerras * Reads return the guest view of the hash table, which has to be 1227a2932923SPaul Mackerras * pieced together from the real hash table and the guest_rpte 1228a2932923SPaul Mackerras * values in the revmap array. 1229a2932923SPaul Mackerras * 1230a2932923SPaul Mackerras * On writes, each HPTE written is considered in turn, and if it 1231a2932923SPaul Mackerras * is valid, it is written to the HPT as if an H_ENTER with the 1232a2932923SPaul Mackerras * exact flag set was done. When the invalid count is non-zero 1233a2932923SPaul Mackerras * in the header written to the stream, the kernel will make 1234a2932923SPaul Mackerras * sure that that many HPTEs are invalid, and invalidate them 1235a2932923SPaul Mackerras * if not. 1236a2932923SPaul Mackerras */ 1237a2932923SPaul Mackerras 1238a2932923SPaul Mackerras struct kvm_htab_ctx { 1239a2932923SPaul Mackerras unsigned long index; 1240a2932923SPaul Mackerras unsigned long flags; 1241a2932923SPaul Mackerras struct kvm *kvm; 1242a2932923SPaul Mackerras int first_pass; 1243a2932923SPaul Mackerras }; 1244a2932923SPaul Mackerras 1245a2932923SPaul Mackerras #define HPTE_SIZE (2 * sizeof(unsigned long)) 1246a2932923SPaul Mackerras 1247a1b4a0f6SPaul Mackerras /* 1248a1b4a0f6SPaul Mackerras * Returns 1 if this HPT entry has been modified or has pending 1249a1b4a0f6SPaul Mackerras * R/C bit changes. 1250a1b4a0f6SPaul Mackerras */ 1251a1b4a0f6SPaul Mackerras static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) 1252a1b4a0f6SPaul Mackerras { 1253a1b4a0f6SPaul Mackerras unsigned long rcbits_unset; 1254a1b4a0f6SPaul Mackerras 1255a1b4a0f6SPaul Mackerras if (revp->guest_rpte & HPTE_GR_MODIFIED) 1256a1b4a0f6SPaul Mackerras return 1; 1257a1b4a0f6SPaul Mackerras 1258a1b4a0f6SPaul Mackerras /* Also need to consider changes in reference and changed bits */ 1259a1b4a0f6SPaul Mackerras rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); 1260a1b4a0f6SPaul Mackerras if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) 1261a1b4a0f6SPaul Mackerras return 1; 1262a1b4a0f6SPaul Mackerras 1263a1b4a0f6SPaul Mackerras return 0; 1264a1b4a0f6SPaul Mackerras } 1265a1b4a0f6SPaul Mackerras 1266a2932923SPaul Mackerras static long record_hpte(unsigned long flags, unsigned long *hptp, 1267a2932923SPaul Mackerras unsigned long *hpte, struct revmap_entry *revp, 1268a2932923SPaul Mackerras int want_valid, int first_pass) 1269a2932923SPaul Mackerras { 1270a2932923SPaul Mackerras unsigned long v, r; 1271a1b4a0f6SPaul Mackerras unsigned long rcbits_unset; 1272a2932923SPaul Mackerras int ok = 1; 1273a2932923SPaul Mackerras int valid, dirty; 1274a2932923SPaul Mackerras 1275a2932923SPaul Mackerras /* Unmodified entries are uninteresting except on the first pass */ 1276a1b4a0f6SPaul Mackerras dirty = hpte_dirty(revp, hptp); 1277a2932923SPaul Mackerras if (!first_pass && !dirty) 1278a2932923SPaul Mackerras return 0; 1279a2932923SPaul Mackerras 1280a2932923SPaul Mackerras valid = 0; 1281a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { 1282a2932923SPaul Mackerras valid = 1; 1283a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && 1284a2932923SPaul Mackerras !(hptp[0] & HPTE_V_BOLTED)) 1285a2932923SPaul Mackerras valid = 0; 1286a2932923SPaul Mackerras } 1287a2932923SPaul Mackerras if (valid != want_valid) 1288a2932923SPaul Mackerras return 0; 1289a2932923SPaul Mackerras 1290a2932923SPaul Mackerras v = r = 0; 1291a2932923SPaul Mackerras if (valid || dirty) { 1292a2932923SPaul Mackerras /* lock the HPTE so it's stable and read it */ 1293a2932923SPaul Mackerras preempt_disable(); 1294a2932923SPaul Mackerras while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1295a2932923SPaul Mackerras cpu_relax(); 1296a2932923SPaul Mackerras v = hptp[0]; 1297a1b4a0f6SPaul Mackerras 1298a1b4a0f6SPaul Mackerras /* re-evaluate valid and dirty from synchronized HPTE value */ 1299a1b4a0f6SPaul Mackerras valid = !!(v & HPTE_V_VALID); 1300a1b4a0f6SPaul Mackerras dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1301a1b4a0f6SPaul Mackerras 1302a1b4a0f6SPaul Mackerras /* Harvest R and C into guest view if necessary */ 1303a1b4a0f6SPaul Mackerras rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); 1304a1b4a0f6SPaul Mackerras if (valid && (rcbits_unset & hptp[1])) { 1305a1b4a0f6SPaul Mackerras revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | 1306a1b4a0f6SPaul Mackerras HPTE_GR_MODIFIED; 1307a1b4a0f6SPaul Mackerras dirty = 1; 1308a1b4a0f6SPaul Mackerras } 1309a1b4a0f6SPaul Mackerras 1310a2932923SPaul Mackerras if (v & HPTE_V_ABSENT) { 1311a2932923SPaul Mackerras v &= ~HPTE_V_ABSENT; 1312a2932923SPaul Mackerras v |= HPTE_V_VALID; 1313a1b4a0f6SPaul Mackerras valid = 1; 1314a2932923SPaul Mackerras } 1315a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) 1316a2932923SPaul Mackerras valid = 0; 1317a1b4a0f6SPaul Mackerras 1318a1b4a0f6SPaul Mackerras r = revp->guest_rpte; 1319a2932923SPaul Mackerras /* only clear modified if this is the right sort of entry */ 1320a2932923SPaul Mackerras if (valid == want_valid && dirty) { 1321a2932923SPaul Mackerras r &= ~HPTE_GR_MODIFIED; 1322a2932923SPaul Mackerras revp->guest_rpte = r; 1323a2932923SPaul Mackerras } 1324a2932923SPaul Mackerras asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); 1325a2932923SPaul Mackerras hptp[0] &= ~HPTE_V_HVLOCK; 1326a2932923SPaul Mackerras preempt_enable(); 1327a2932923SPaul Mackerras if (!(valid == want_valid && (first_pass || dirty))) 1328a2932923SPaul Mackerras ok = 0; 1329a2932923SPaul Mackerras } 1330a2932923SPaul Mackerras hpte[0] = v; 1331a2932923SPaul Mackerras hpte[1] = r; 1332a2932923SPaul Mackerras return ok; 1333a2932923SPaul Mackerras } 1334a2932923SPaul Mackerras 1335a2932923SPaul Mackerras static ssize_t kvm_htab_read(struct file *file, char __user *buf, 1336a2932923SPaul Mackerras size_t count, loff_t *ppos) 1337a2932923SPaul Mackerras { 1338a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1339a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1340a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1341a2932923SPaul Mackerras unsigned long *hptp; 1342a2932923SPaul Mackerras struct revmap_entry *revp; 1343a2932923SPaul Mackerras unsigned long i, nb, nw; 1344a2932923SPaul Mackerras unsigned long __user *lbuf; 1345a2932923SPaul Mackerras struct kvm_get_htab_header __user *hptr; 1346a2932923SPaul Mackerras unsigned long flags; 1347a2932923SPaul Mackerras int first_pass; 1348a2932923SPaul Mackerras unsigned long hpte[2]; 1349a2932923SPaul Mackerras 1350a2932923SPaul Mackerras if (!access_ok(VERIFY_WRITE, buf, count)) 1351a2932923SPaul Mackerras return -EFAULT; 1352a2932923SPaul Mackerras 1353a2932923SPaul Mackerras first_pass = ctx->first_pass; 1354a2932923SPaul Mackerras flags = ctx->flags; 1355a2932923SPaul Mackerras 1356a2932923SPaul Mackerras i = ctx->index; 1357a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1358a2932923SPaul Mackerras revp = kvm->arch.revmap + i; 1359a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1360a2932923SPaul Mackerras 1361a2932923SPaul Mackerras nb = 0; 1362a2932923SPaul Mackerras while (nb + sizeof(hdr) + HPTE_SIZE < count) { 1363a2932923SPaul Mackerras /* Initialize header */ 1364a2932923SPaul Mackerras hptr = (struct kvm_get_htab_header __user *)buf; 1365a2932923SPaul Mackerras hdr.n_valid = 0; 1366a2932923SPaul Mackerras hdr.n_invalid = 0; 1367a2932923SPaul Mackerras nw = nb; 1368a2932923SPaul Mackerras nb += sizeof(hdr); 1369a2932923SPaul Mackerras lbuf = (unsigned long __user *)(buf + sizeof(hdr)); 1370a2932923SPaul Mackerras 1371a2932923SPaul Mackerras /* Skip uninteresting entries, i.e. clean on not-first pass */ 1372a2932923SPaul Mackerras if (!first_pass) { 1373a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1374a1b4a0f6SPaul Mackerras !hpte_dirty(revp, hptp)) { 1375a2932923SPaul Mackerras ++i; 1376a2932923SPaul Mackerras hptp += 2; 1377a2932923SPaul Mackerras ++revp; 1378a2932923SPaul Mackerras } 1379a2932923SPaul Mackerras } 138005dd85f7SPaul Mackerras hdr.index = i; 1381a2932923SPaul Mackerras 1382a2932923SPaul Mackerras /* Grab a series of valid entries */ 1383a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1384a2932923SPaul Mackerras hdr.n_valid < 0xffff && 1385a2932923SPaul Mackerras nb + HPTE_SIZE < count && 1386a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { 1387a2932923SPaul Mackerras /* valid entry, write it out */ 1388a2932923SPaul Mackerras ++hdr.n_valid; 1389a2932923SPaul Mackerras if (__put_user(hpte[0], lbuf) || 1390a2932923SPaul Mackerras __put_user(hpte[1], lbuf + 1)) 1391a2932923SPaul Mackerras return -EFAULT; 1392a2932923SPaul Mackerras nb += HPTE_SIZE; 1393a2932923SPaul Mackerras lbuf += 2; 1394a2932923SPaul Mackerras ++i; 1395a2932923SPaul Mackerras hptp += 2; 1396a2932923SPaul Mackerras ++revp; 1397a2932923SPaul Mackerras } 1398a2932923SPaul Mackerras /* Now skip invalid entries while we can */ 1399a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1400a2932923SPaul Mackerras hdr.n_invalid < 0xffff && 1401a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { 1402a2932923SPaul Mackerras /* found an invalid entry */ 1403a2932923SPaul Mackerras ++hdr.n_invalid; 1404a2932923SPaul Mackerras ++i; 1405a2932923SPaul Mackerras hptp += 2; 1406a2932923SPaul Mackerras ++revp; 1407a2932923SPaul Mackerras } 1408a2932923SPaul Mackerras 1409a2932923SPaul Mackerras if (hdr.n_valid || hdr.n_invalid) { 1410a2932923SPaul Mackerras /* write back the header */ 1411a2932923SPaul Mackerras if (__copy_to_user(hptr, &hdr, sizeof(hdr))) 1412a2932923SPaul Mackerras return -EFAULT; 1413a2932923SPaul Mackerras nw = nb; 1414a2932923SPaul Mackerras buf = (char __user *)lbuf; 1415a2932923SPaul Mackerras } else { 1416a2932923SPaul Mackerras nb = nw; 1417a2932923SPaul Mackerras } 1418a2932923SPaul Mackerras 1419a2932923SPaul Mackerras /* Check if we've wrapped around the hash table */ 1420a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte) { 1421a2932923SPaul Mackerras i = 0; 1422a2932923SPaul Mackerras ctx->first_pass = 0; 1423a2932923SPaul Mackerras break; 1424a2932923SPaul Mackerras } 1425a2932923SPaul Mackerras } 1426a2932923SPaul Mackerras 1427a2932923SPaul Mackerras ctx->index = i; 1428a2932923SPaul Mackerras 1429a2932923SPaul Mackerras return nb; 1430a2932923SPaul Mackerras } 1431a2932923SPaul Mackerras 1432a2932923SPaul Mackerras static ssize_t kvm_htab_write(struct file *file, const char __user *buf, 1433a2932923SPaul Mackerras size_t count, loff_t *ppos) 1434a2932923SPaul Mackerras { 1435a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1436a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1437a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1438a2932923SPaul Mackerras unsigned long i, j; 1439a2932923SPaul Mackerras unsigned long v, r; 1440a2932923SPaul Mackerras unsigned long __user *lbuf; 1441a2932923SPaul Mackerras unsigned long *hptp; 1442a2932923SPaul Mackerras unsigned long tmp[2]; 1443a2932923SPaul Mackerras ssize_t nb; 1444a2932923SPaul Mackerras long int err, ret; 1445a2932923SPaul Mackerras int rma_setup; 1446a2932923SPaul Mackerras 1447a2932923SPaul Mackerras if (!access_ok(VERIFY_READ, buf, count)) 1448a2932923SPaul Mackerras return -EFAULT; 1449a2932923SPaul Mackerras 1450a2932923SPaul Mackerras /* lock out vcpus from running while we're doing this */ 1451a2932923SPaul Mackerras mutex_lock(&kvm->lock); 1452a2932923SPaul Mackerras rma_setup = kvm->arch.rma_setup_done; 1453a2932923SPaul Mackerras if (rma_setup) { 1454a2932923SPaul Mackerras kvm->arch.rma_setup_done = 0; /* temporarily */ 1455a2932923SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 1456a2932923SPaul Mackerras smp_mb(); 1457a2932923SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 1458a2932923SPaul Mackerras kvm->arch.rma_setup_done = 1; 1459a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1460a2932923SPaul Mackerras return -EBUSY; 1461a2932923SPaul Mackerras } 1462a2932923SPaul Mackerras } 1463a2932923SPaul Mackerras 1464a2932923SPaul Mackerras err = 0; 1465a2932923SPaul Mackerras for (nb = 0; nb + sizeof(hdr) <= count; ) { 1466a2932923SPaul Mackerras err = -EFAULT; 1467a2932923SPaul Mackerras if (__copy_from_user(&hdr, buf, sizeof(hdr))) 1468a2932923SPaul Mackerras break; 1469a2932923SPaul Mackerras 1470a2932923SPaul Mackerras err = 0; 1471a2932923SPaul Mackerras if (nb + hdr.n_valid * HPTE_SIZE > count) 1472a2932923SPaul Mackerras break; 1473a2932923SPaul Mackerras 1474a2932923SPaul Mackerras nb += sizeof(hdr); 1475a2932923SPaul Mackerras buf += sizeof(hdr); 1476a2932923SPaul Mackerras 1477a2932923SPaul Mackerras err = -EINVAL; 1478a2932923SPaul Mackerras i = hdr.index; 1479a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte || 1480a2932923SPaul Mackerras i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) 1481a2932923SPaul Mackerras break; 1482a2932923SPaul Mackerras 1483a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1484a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1485a2932923SPaul Mackerras for (j = 0; j < hdr.n_valid; ++j) { 1486a2932923SPaul Mackerras err = -EFAULT; 1487a2932923SPaul Mackerras if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) 1488a2932923SPaul Mackerras goto out; 1489a2932923SPaul Mackerras err = -EINVAL; 1490a2932923SPaul Mackerras if (!(v & HPTE_V_VALID)) 1491a2932923SPaul Mackerras goto out; 1492a2932923SPaul Mackerras lbuf += 2; 1493a2932923SPaul Mackerras nb += HPTE_SIZE; 1494a2932923SPaul Mackerras 1495a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1496a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1497a2932923SPaul Mackerras err = -EIO; 1498a2932923SPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, 1499a2932923SPaul Mackerras tmp); 1500a2932923SPaul Mackerras if (ret != H_SUCCESS) { 1501a2932923SPaul Mackerras pr_err("kvm_htab_write ret %ld i=%ld v=%lx " 1502a2932923SPaul Mackerras "r=%lx\n", ret, i, v, r); 1503a2932923SPaul Mackerras goto out; 1504a2932923SPaul Mackerras } 1505a2932923SPaul Mackerras if (!rma_setup && is_vrma_hpte(v)) { 1506a2932923SPaul Mackerras unsigned long psize = hpte_page_size(v, r); 1507a2932923SPaul Mackerras unsigned long senc = slb_pgsize_encoding(psize); 1508a2932923SPaul Mackerras unsigned long lpcr; 1509a2932923SPaul Mackerras 1510a2932923SPaul Mackerras kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1511a2932923SPaul Mackerras (VRMA_VSID << SLB_VSID_SHIFT_1T); 1512a0144e2aSPaul Mackerras lpcr = senc << (LPCR_VRMASD_SH - 4); 1513a0144e2aSPaul Mackerras kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1514a2932923SPaul Mackerras rma_setup = 1; 1515a2932923SPaul Mackerras } 1516a2932923SPaul Mackerras ++i; 1517a2932923SPaul Mackerras hptp += 2; 1518a2932923SPaul Mackerras } 1519a2932923SPaul Mackerras 1520a2932923SPaul Mackerras for (j = 0; j < hdr.n_invalid; ++j) { 1521a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1522a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1523a2932923SPaul Mackerras ++i; 1524a2932923SPaul Mackerras hptp += 2; 1525a2932923SPaul Mackerras } 1526a2932923SPaul Mackerras err = 0; 1527a2932923SPaul Mackerras } 1528a2932923SPaul Mackerras 1529a2932923SPaul Mackerras out: 1530a2932923SPaul Mackerras /* Order HPTE updates vs. rma_setup_done */ 1531a2932923SPaul Mackerras smp_wmb(); 1532a2932923SPaul Mackerras kvm->arch.rma_setup_done = rma_setup; 1533a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1534a2932923SPaul Mackerras 1535a2932923SPaul Mackerras if (err) 1536a2932923SPaul Mackerras return err; 1537a2932923SPaul Mackerras return nb; 1538a2932923SPaul Mackerras } 1539a2932923SPaul Mackerras 1540a2932923SPaul Mackerras static int kvm_htab_release(struct inode *inode, struct file *filp) 1541a2932923SPaul Mackerras { 1542a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = filp->private_data; 1543a2932923SPaul Mackerras 1544a2932923SPaul Mackerras filp->private_data = NULL; 1545a2932923SPaul Mackerras if (!(ctx->flags & KVM_GET_HTAB_WRITE)) 1546a2932923SPaul Mackerras atomic_dec(&ctx->kvm->arch.hpte_mod_interest); 1547a2932923SPaul Mackerras kvm_put_kvm(ctx->kvm); 1548a2932923SPaul Mackerras kfree(ctx); 1549a2932923SPaul Mackerras return 0; 1550a2932923SPaul Mackerras } 1551a2932923SPaul Mackerras 155275ef9de1SAl Viro static const struct file_operations kvm_htab_fops = { 1553a2932923SPaul Mackerras .read = kvm_htab_read, 1554a2932923SPaul Mackerras .write = kvm_htab_write, 1555a2932923SPaul Mackerras .llseek = default_llseek, 1556a2932923SPaul Mackerras .release = kvm_htab_release, 1557a2932923SPaul Mackerras }; 1558a2932923SPaul Mackerras 1559a2932923SPaul Mackerras int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) 1560a2932923SPaul Mackerras { 1561a2932923SPaul Mackerras int ret; 1562a2932923SPaul Mackerras struct kvm_htab_ctx *ctx; 1563a2932923SPaul Mackerras int rwflag; 1564a2932923SPaul Mackerras 1565a2932923SPaul Mackerras /* reject flags we don't recognize */ 1566a2932923SPaul Mackerras if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) 1567a2932923SPaul Mackerras return -EINVAL; 1568a2932923SPaul Mackerras ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1569a2932923SPaul Mackerras if (!ctx) 1570a2932923SPaul Mackerras return -ENOMEM; 1571a2932923SPaul Mackerras kvm_get_kvm(kvm); 1572a2932923SPaul Mackerras ctx->kvm = kvm; 1573a2932923SPaul Mackerras ctx->index = ghf->start_index; 1574a2932923SPaul Mackerras ctx->flags = ghf->flags; 1575a2932923SPaul Mackerras ctx->first_pass = 1; 1576a2932923SPaul Mackerras 1577a2932923SPaul Mackerras rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; 15782f84d5eaSYann Droneaud ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); 1579a2932923SPaul Mackerras if (ret < 0) { 1580a2932923SPaul Mackerras kvm_put_kvm(kvm); 1581a2932923SPaul Mackerras return ret; 1582a2932923SPaul Mackerras } 1583a2932923SPaul Mackerras 1584a2932923SPaul Mackerras if (rwflag == O_RDONLY) { 1585a2932923SPaul Mackerras mutex_lock(&kvm->slots_lock); 1586a2932923SPaul Mackerras atomic_inc(&kvm->arch.hpte_mod_interest); 1587a2932923SPaul Mackerras /* make sure kvmppc_do_h_enter etc. see the increment */ 1588a2932923SPaul Mackerras synchronize_srcu_expedited(&kvm->srcu); 1589a2932923SPaul Mackerras mutex_unlock(&kvm->slots_lock); 1590a2932923SPaul Mackerras } 1591a2932923SPaul Mackerras 1592a2932923SPaul Mackerras return ret; 1593a2932923SPaul Mackerras } 1594a2932923SPaul Mackerras 1595de56a948SPaul Mackerras void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1596de56a948SPaul Mackerras { 1597de56a948SPaul Mackerras struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1598de56a948SPaul Mackerras 15999e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) 16009e368f29SPaul Mackerras vcpu->arch.slb_nr = 32; /* POWER7 */ 16019e368f29SPaul Mackerras else 16029e368f29SPaul Mackerras vcpu->arch.slb_nr = 64; 1603de56a948SPaul Mackerras 1604de56a948SPaul Mackerras mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1605de56a948SPaul Mackerras mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1606de56a948SPaul Mackerras 1607de56a948SPaul Mackerras vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 1608de56a948SPaul Mackerras } 1609