1de56a948SPaul Mackerras /* 2de56a948SPaul Mackerras * This program is free software; you can redistribute it and/or modify 3de56a948SPaul Mackerras * it under the terms of the GNU General Public License, version 2, as 4de56a948SPaul Mackerras * published by the Free Software Foundation. 5de56a948SPaul Mackerras * 6de56a948SPaul Mackerras * This program is distributed in the hope that it will be useful, 7de56a948SPaul Mackerras * but WITHOUT ANY WARRANTY; without even the implied warranty of 8de56a948SPaul Mackerras * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9de56a948SPaul Mackerras * GNU General Public License for more details. 10de56a948SPaul Mackerras * 11de56a948SPaul Mackerras * You should have received a copy of the GNU General Public License 12de56a948SPaul Mackerras * along with this program; if not, write to the Free Software 13de56a948SPaul Mackerras * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14de56a948SPaul Mackerras * 15de56a948SPaul Mackerras * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16de56a948SPaul Mackerras */ 17de56a948SPaul Mackerras 18de56a948SPaul Mackerras #include <linux/types.h> 19de56a948SPaul Mackerras #include <linux/string.h> 20de56a948SPaul Mackerras #include <linux/kvm.h> 21de56a948SPaul Mackerras #include <linux/kvm_host.h> 22de56a948SPaul Mackerras #include <linux/highmem.h> 23de56a948SPaul Mackerras #include <linux/gfp.h> 24de56a948SPaul Mackerras #include <linux/slab.h> 25de56a948SPaul Mackerras #include <linux/hugetlb.h> 268936dda4SPaul Mackerras #include <linux/vmalloc.h> 272c9097e4SPaul Mackerras #include <linux/srcu.h> 28a2932923SPaul Mackerras #include <linux/anon_inodes.h> 29a2932923SPaul Mackerras #include <linux/file.h> 30de56a948SPaul Mackerras 31de56a948SPaul Mackerras #include <asm/tlbflush.h> 32de56a948SPaul Mackerras #include <asm/kvm_ppc.h> 33de56a948SPaul Mackerras #include <asm/kvm_book3s.h> 34de56a948SPaul Mackerras #include <asm/mmu-hash64.h> 35de56a948SPaul Mackerras #include <asm/hvcall.h> 36de56a948SPaul Mackerras #include <asm/synch.h> 37de56a948SPaul Mackerras #include <asm/ppc-opcode.h> 38de56a948SPaul Mackerras #include <asm/cputable.h> 39de56a948SPaul Mackerras 40990978e9SAneesh Kumar K.V #include "book3s_hv_cma.h" 41990978e9SAneesh Kumar K.V 429e368f29SPaul Mackerras /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 439e368f29SPaul Mackerras #define MAX_LPID_970 63 44de56a948SPaul Mackerras 4532fad281SPaul Mackerras /* Power architecture requires HPT is at least 256kB */ 4632fad281SPaul Mackerras #define PPC_MIN_HPT_ORDER 18 4732fad281SPaul Mackerras 487ed661bfSPaul Mackerras static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 497ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 507ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret); 51a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm); 527ed661bfSPaul Mackerras 5332fad281SPaul Mackerras long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 54de56a948SPaul Mackerras { 55de56a948SPaul Mackerras unsigned long hpt; 568936dda4SPaul Mackerras struct revmap_entry *rev; 57fa61a4e3SAneesh Kumar K.V struct page *page = NULL; 58fa61a4e3SAneesh Kumar K.V long order = KVM_DEFAULT_HPT_ORDER; 59de56a948SPaul Mackerras 6032fad281SPaul Mackerras if (htab_orderp) { 6132fad281SPaul Mackerras order = *htab_orderp; 6232fad281SPaul Mackerras if (order < PPC_MIN_HPT_ORDER) 6332fad281SPaul Mackerras order = PPC_MIN_HPT_ORDER; 6432fad281SPaul Mackerras } 6532fad281SPaul Mackerras 66fa61a4e3SAneesh Kumar K.V kvm->arch.hpt_cma_alloc = 0; 6732fad281SPaul Mackerras /* 6832fad281SPaul Mackerras * try first to allocate it from the kernel page allocator. 69fa61a4e3SAneesh Kumar K.V * We keep the CMA reserved for failed allocation. 7032fad281SPaul Mackerras */ 7132fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | 7232fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 7332fad281SPaul Mackerras 7432fad281SPaul Mackerras /* Next try to allocate from the preallocated pool */ 7532fad281SPaul Mackerras if (!hpt) { 76990978e9SAneesh Kumar K.V VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); 77fa61a4e3SAneesh Kumar K.V page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); 78fa61a4e3SAneesh Kumar K.V if (page) { 79fa61a4e3SAneesh Kumar K.V hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 80fa61a4e3SAneesh Kumar K.V kvm->arch.hpt_cma_alloc = 1; 81fa61a4e3SAneesh Kumar K.V } else 82fa61a4e3SAneesh Kumar K.V --order; 83d2a1b483SAlexander Graf } 84d2a1b483SAlexander Graf 8532fad281SPaul Mackerras /* Lastly try successively smaller sizes from the page allocator */ 8632fad281SPaul Mackerras while (!hpt && order > PPC_MIN_HPT_ORDER) { 8732fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 8832fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 8932fad281SPaul Mackerras if (!hpt) 9032fad281SPaul Mackerras --order; 91de56a948SPaul Mackerras } 9232fad281SPaul Mackerras 9332fad281SPaul Mackerras if (!hpt) 9432fad281SPaul Mackerras return -ENOMEM; 9532fad281SPaul Mackerras 96de56a948SPaul Mackerras kvm->arch.hpt_virt = hpt; 9732fad281SPaul Mackerras kvm->arch.hpt_order = order; 9832fad281SPaul Mackerras /* HPTEs are 2**4 bytes long */ 9932fad281SPaul Mackerras kvm->arch.hpt_npte = 1ul << (order - 4); 10032fad281SPaul Mackerras /* 128 (2**7) bytes in each HPTEG */ 10132fad281SPaul Mackerras kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; 102de56a948SPaul Mackerras 1038936dda4SPaul Mackerras /* Allocate reverse map array */ 10432fad281SPaul Mackerras rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 1058936dda4SPaul Mackerras if (!rev) { 1068936dda4SPaul Mackerras pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); 1078936dda4SPaul Mackerras goto out_freehpt; 1088936dda4SPaul Mackerras } 1098936dda4SPaul Mackerras kvm->arch.revmap = rev; 11032fad281SPaul Mackerras kvm->arch.sdr1 = __pa(hpt) | (order - 18); 1118936dda4SPaul Mackerras 11232fad281SPaul Mackerras pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", 11332fad281SPaul Mackerras hpt, order, kvm->arch.lpid); 114de56a948SPaul Mackerras 11532fad281SPaul Mackerras if (htab_orderp) 11632fad281SPaul Mackerras *htab_orderp = order; 117de56a948SPaul Mackerras return 0; 1188936dda4SPaul Mackerras 1198936dda4SPaul Mackerras out_freehpt: 120fa61a4e3SAneesh Kumar K.V if (kvm->arch.hpt_cma_alloc) 121fa61a4e3SAneesh Kumar K.V kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); 12232fad281SPaul Mackerras else 12332fad281SPaul Mackerras free_pages(hpt, order - PAGE_SHIFT); 1248936dda4SPaul Mackerras return -ENOMEM; 125de56a948SPaul Mackerras } 126de56a948SPaul Mackerras 12732fad281SPaul Mackerras long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) 12832fad281SPaul Mackerras { 12932fad281SPaul Mackerras long err = -EBUSY; 13032fad281SPaul Mackerras long order; 13132fad281SPaul Mackerras 13232fad281SPaul Mackerras mutex_lock(&kvm->lock); 13332fad281SPaul Mackerras if (kvm->arch.rma_setup_done) { 13432fad281SPaul Mackerras kvm->arch.rma_setup_done = 0; 13532fad281SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 13632fad281SPaul Mackerras smp_mb(); 13732fad281SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 13832fad281SPaul Mackerras kvm->arch.rma_setup_done = 1; 13932fad281SPaul Mackerras goto out; 14032fad281SPaul Mackerras } 14132fad281SPaul Mackerras } 14232fad281SPaul Mackerras if (kvm->arch.hpt_virt) { 14332fad281SPaul Mackerras order = kvm->arch.hpt_order; 14432fad281SPaul Mackerras /* Set the entire HPT to 0, i.e. invalid HPTEs */ 14532fad281SPaul Mackerras memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 14632fad281SPaul Mackerras /* 147a64fd707SPaul Mackerras * Reset all the reverse-mapping chains for all memslots 148a64fd707SPaul Mackerras */ 149a64fd707SPaul Mackerras kvmppc_rmap_reset(kvm); 1501b400ba0SPaul Mackerras /* Ensure that each vcpu will flush its TLB on next entry. */ 1511b400ba0SPaul Mackerras cpumask_setall(&kvm->arch.need_tlb_flush); 15232fad281SPaul Mackerras *htab_orderp = order; 15332fad281SPaul Mackerras err = 0; 15432fad281SPaul Mackerras } else { 15532fad281SPaul Mackerras err = kvmppc_alloc_hpt(kvm, htab_orderp); 15632fad281SPaul Mackerras order = *htab_orderp; 15732fad281SPaul Mackerras } 15832fad281SPaul Mackerras out: 15932fad281SPaul Mackerras mutex_unlock(&kvm->lock); 16032fad281SPaul Mackerras return err; 16132fad281SPaul Mackerras } 16232fad281SPaul Mackerras 163de56a948SPaul Mackerras void kvmppc_free_hpt(struct kvm *kvm) 164de56a948SPaul Mackerras { 165043cc4d7SScott Wood kvmppc_free_lpid(kvm->arch.lpid); 1668936dda4SPaul Mackerras vfree(kvm->arch.revmap); 167fa61a4e3SAneesh Kumar K.V if (kvm->arch.hpt_cma_alloc) 168fa61a4e3SAneesh Kumar K.V kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), 169fa61a4e3SAneesh Kumar K.V 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); 170d2a1b483SAlexander Graf else 17132fad281SPaul Mackerras free_pages(kvm->arch.hpt_virt, 17232fad281SPaul Mackerras kvm->arch.hpt_order - PAGE_SHIFT); 173de56a948SPaul Mackerras } 174de56a948SPaul Mackerras 175da9d1d7fSPaul Mackerras /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ 176da9d1d7fSPaul Mackerras static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) 177de56a948SPaul Mackerras { 178da9d1d7fSPaul Mackerras return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; 179da9d1d7fSPaul Mackerras } 180da9d1d7fSPaul Mackerras 181da9d1d7fSPaul Mackerras /* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ 182da9d1d7fSPaul Mackerras static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) 183da9d1d7fSPaul Mackerras { 184da9d1d7fSPaul Mackerras return (pgsize == 0x10000) ? 0x1000 : 0; 185da9d1d7fSPaul Mackerras } 186da9d1d7fSPaul Mackerras 187da9d1d7fSPaul Mackerras void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, 188da9d1d7fSPaul Mackerras unsigned long porder) 189da9d1d7fSPaul Mackerras { 190de56a948SPaul Mackerras unsigned long i; 191b2b2f165SPaul Mackerras unsigned long npages; 192c77162deSPaul Mackerras unsigned long hp_v, hp_r; 193c77162deSPaul Mackerras unsigned long addr, hash; 194da9d1d7fSPaul Mackerras unsigned long psize; 195da9d1d7fSPaul Mackerras unsigned long hp0, hp1; 1967ed661bfSPaul Mackerras unsigned long idx_ret; 197c77162deSPaul Mackerras long ret; 19832fad281SPaul Mackerras struct kvm *kvm = vcpu->kvm; 199de56a948SPaul Mackerras 200da9d1d7fSPaul Mackerras psize = 1ul << porder; 201da9d1d7fSPaul Mackerras npages = memslot->npages >> (porder - PAGE_SHIFT); 202de56a948SPaul Mackerras 203de56a948SPaul Mackerras /* VRMA can't be > 1TB */ 2048936dda4SPaul Mackerras if (npages > 1ul << (40 - porder)) 2058936dda4SPaul Mackerras npages = 1ul << (40 - porder); 206de56a948SPaul Mackerras /* Can't use more than 1 HPTE per HPTEG */ 20732fad281SPaul Mackerras if (npages > kvm->arch.hpt_mask + 1) 20832fad281SPaul Mackerras npages = kvm->arch.hpt_mask + 1; 209de56a948SPaul Mackerras 210da9d1d7fSPaul Mackerras hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 211da9d1d7fSPaul Mackerras HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); 212da9d1d7fSPaul Mackerras hp1 = hpte1_pgsize_encoding(psize) | 213da9d1d7fSPaul Mackerras HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; 214da9d1d7fSPaul Mackerras 215de56a948SPaul Mackerras for (i = 0; i < npages; ++i) { 216c77162deSPaul Mackerras addr = i << porder; 217de56a948SPaul Mackerras /* can't use hpt_hash since va > 64 bits */ 21832fad281SPaul Mackerras hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; 219de56a948SPaul Mackerras /* 220de56a948SPaul Mackerras * We assume that the hash table is empty and no 221de56a948SPaul Mackerras * vcpus are using it at this stage. Since we create 222de56a948SPaul Mackerras * at most one HPTE per HPTEG, we just assume entry 7 223de56a948SPaul Mackerras * is available and use it. 224de56a948SPaul Mackerras */ 2258936dda4SPaul Mackerras hash = (hash << 3) + 7; 226da9d1d7fSPaul Mackerras hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 227da9d1d7fSPaul Mackerras hp_r = hp1 | addr; 2287ed661bfSPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r, 2297ed661bfSPaul Mackerras &idx_ret); 230c77162deSPaul Mackerras if (ret != H_SUCCESS) { 231c77162deSPaul Mackerras pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 232c77162deSPaul Mackerras addr, ret); 233c77162deSPaul Mackerras break; 234c77162deSPaul Mackerras } 235de56a948SPaul Mackerras } 236de56a948SPaul Mackerras } 237de56a948SPaul Mackerras 238de56a948SPaul Mackerras int kvmppc_mmu_hv_init(void) 239de56a948SPaul Mackerras { 2409e368f29SPaul Mackerras unsigned long host_lpid, rsvd_lpid; 2419e368f29SPaul Mackerras 2429e368f29SPaul Mackerras if (!cpu_has_feature(CPU_FTR_HVMODE)) 243de56a948SPaul Mackerras return -EINVAL; 2449e368f29SPaul Mackerras 245043cc4d7SScott Wood /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 2469e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) { 2479e368f29SPaul Mackerras host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 2489e368f29SPaul Mackerras rsvd_lpid = LPID_RSVD; 2499e368f29SPaul Mackerras } else { 2509e368f29SPaul Mackerras host_lpid = 0; /* PPC970 */ 2519e368f29SPaul Mackerras rsvd_lpid = MAX_LPID_970; 2529e368f29SPaul Mackerras } 2539e368f29SPaul Mackerras 254043cc4d7SScott Wood kvmppc_init_lpid(rsvd_lpid + 1); 255043cc4d7SScott Wood 256043cc4d7SScott Wood kvmppc_claim_lpid(host_lpid); 2579e368f29SPaul Mackerras /* rsvd_lpid is reserved for use in partition switching */ 258043cc4d7SScott Wood kvmppc_claim_lpid(rsvd_lpid); 259de56a948SPaul Mackerras 260de56a948SPaul Mackerras return 0; 261de56a948SPaul Mackerras } 262de56a948SPaul Mackerras 263de56a948SPaul Mackerras void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 264de56a948SPaul Mackerras { 265de56a948SPaul Mackerras } 266de56a948SPaul Mackerras 267de56a948SPaul Mackerras static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 268de56a948SPaul Mackerras { 269de56a948SPaul Mackerras kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 270de56a948SPaul Mackerras } 271de56a948SPaul Mackerras 272c77162deSPaul Mackerras /* 273c77162deSPaul Mackerras * This is called to get a reference to a guest page if there isn't 274a66b48c3SPaul Mackerras * one already in the memslot->arch.slot_phys[] array. 275c77162deSPaul Mackerras */ 276c77162deSPaul Mackerras static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, 277da9d1d7fSPaul Mackerras struct kvm_memory_slot *memslot, 278da9d1d7fSPaul Mackerras unsigned long psize) 279c77162deSPaul Mackerras { 280c77162deSPaul Mackerras unsigned long start; 281da9d1d7fSPaul Mackerras long np, err; 282da9d1d7fSPaul Mackerras struct page *page, *hpage, *pages[1]; 283da9d1d7fSPaul Mackerras unsigned long s, pgsize; 284c77162deSPaul Mackerras unsigned long *physp; 2859d0ef5eaSPaul Mackerras unsigned int is_io, got, pgorder; 2869d0ef5eaSPaul Mackerras struct vm_area_struct *vma; 287da9d1d7fSPaul Mackerras unsigned long pfn, i, npages; 288c77162deSPaul Mackerras 289a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 290c77162deSPaul Mackerras if (!physp) 291c77162deSPaul Mackerras return -EINVAL; 292da9d1d7fSPaul Mackerras if (physp[gfn - memslot->base_gfn]) 293c77162deSPaul Mackerras return 0; 294c77162deSPaul Mackerras 2959d0ef5eaSPaul Mackerras is_io = 0; 2969d0ef5eaSPaul Mackerras got = 0; 297c77162deSPaul Mackerras page = NULL; 298da9d1d7fSPaul Mackerras pgsize = psize; 2999d0ef5eaSPaul Mackerras err = -EINVAL; 300c77162deSPaul Mackerras start = gfn_to_hva_memslot(memslot, gfn); 301c77162deSPaul Mackerras 302c77162deSPaul Mackerras /* Instantiate and get the page we want access to */ 303c77162deSPaul Mackerras np = get_user_pages_fast(start, 1, 1, pages); 3049d0ef5eaSPaul Mackerras if (np != 1) { 3059d0ef5eaSPaul Mackerras /* Look up the vma for the page */ 3069d0ef5eaSPaul Mackerras down_read(¤t->mm->mmap_sem); 3079d0ef5eaSPaul Mackerras vma = find_vma(current->mm, start); 3089d0ef5eaSPaul Mackerras if (!vma || vma->vm_start > start || 3099d0ef5eaSPaul Mackerras start + psize > vma->vm_end || 3109d0ef5eaSPaul Mackerras !(vma->vm_flags & VM_PFNMAP)) 3119d0ef5eaSPaul Mackerras goto up_err; 3129d0ef5eaSPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 3139d0ef5eaSPaul Mackerras pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 3149d0ef5eaSPaul Mackerras /* check alignment of pfn vs. requested page size */ 3159d0ef5eaSPaul Mackerras if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) 3169d0ef5eaSPaul Mackerras goto up_err; 3179d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3189d0ef5eaSPaul Mackerras 3199d0ef5eaSPaul Mackerras } else { 320c77162deSPaul Mackerras page = pages[0]; 321da9d1d7fSPaul Mackerras got = KVMPPC_GOT_PAGE; 322c77162deSPaul Mackerras 323da9d1d7fSPaul Mackerras /* See if this is a large page */ 324da9d1d7fSPaul Mackerras s = PAGE_SIZE; 325da9d1d7fSPaul Mackerras if (PageHuge(page)) { 326da9d1d7fSPaul Mackerras hpage = compound_head(page); 327da9d1d7fSPaul Mackerras s <<= compound_order(hpage); 328da9d1d7fSPaul Mackerras /* Get the whole large page if slot alignment is ok */ 329da9d1d7fSPaul Mackerras if (s > psize && slot_is_aligned(memslot, s) && 330da9d1d7fSPaul Mackerras !(memslot->userspace_addr & (s - 1))) { 331da9d1d7fSPaul Mackerras start &= ~(s - 1); 332da9d1d7fSPaul Mackerras pgsize = s; 333de6c0b02SDavid Gibson get_page(hpage); 334de6c0b02SDavid Gibson put_page(page); 335da9d1d7fSPaul Mackerras page = hpage; 336c77162deSPaul Mackerras } 337da9d1d7fSPaul Mackerras } 338da9d1d7fSPaul Mackerras if (s < psize) 339da9d1d7fSPaul Mackerras goto out; 340c77162deSPaul Mackerras pfn = page_to_pfn(page); 3419d0ef5eaSPaul Mackerras } 342c77162deSPaul Mackerras 343da9d1d7fSPaul Mackerras npages = pgsize >> PAGE_SHIFT; 344da9d1d7fSPaul Mackerras pgorder = __ilog2(npages); 345da9d1d7fSPaul Mackerras physp += (gfn - memslot->base_gfn) & ~(npages - 1); 346c77162deSPaul Mackerras spin_lock(&kvm->arch.slot_phys_lock); 347da9d1d7fSPaul Mackerras for (i = 0; i < npages; ++i) { 348da9d1d7fSPaul Mackerras if (!physp[i]) { 3499d0ef5eaSPaul Mackerras physp[i] = ((pfn + i) << PAGE_SHIFT) + 3509d0ef5eaSPaul Mackerras got + is_io + pgorder; 351da9d1d7fSPaul Mackerras got = 0; 352da9d1d7fSPaul Mackerras } 353da9d1d7fSPaul Mackerras } 354c77162deSPaul Mackerras spin_unlock(&kvm->arch.slot_phys_lock); 355da9d1d7fSPaul Mackerras err = 0; 356c77162deSPaul Mackerras 357da9d1d7fSPaul Mackerras out: 358de6c0b02SDavid Gibson if (got) 359da9d1d7fSPaul Mackerras put_page(page); 360da9d1d7fSPaul Mackerras return err; 3619d0ef5eaSPaul Mackerras 3629d0ef5eaSPaul Mackerras up_err: 3639d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3649d0ef5eaSPaul Mackerras return err; 365c77162deSPaul Mackerras } 366c77162deSPaul Mackerras 3677ed661bfSPaul Mackerras long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 3687ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 3697ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret) 370c77162deSPaul Mackerras { 371c77162deSPaul Mackerras unsigned long psize, gpa, gfn; 372c77162deSPaul Mackerras struct kvm_memory_slot *memslot; 373c77162deSPaul Mackerras long ret; 374c77162deSPaul Mackerras 375342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 376342d3db7SPaul Mackerras goto do_insert; 377342d3db7SPaul Mackerras 378c77162deSPaul Mackerras psize = hpte_page_size(pteh, ptel); 379c77162deSPaul Mackerras if (!psize) 380c77162deSPaul Mackerras return H_PARAMETER; 381c77162deSPaul Mackerras 382697d3899SPaul Mackerras pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 383697d3899SPaul Mackerras 384c77162deSPaul Mackerras /* Find the memslot (if any) for this address */ 385c77162deSPaul Mackerras gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 386c77162deSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 387c77162deSPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 388697d3899SPaul Mackerras if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { 389da9d1d7fSPaul Mackerras if (!slot_is_aligned(memslot, psize)) 390da9d1d7fSPaul Mackerras return H_PARAMETER; 391da9d1d7fSPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) 392c77162deSPaul Mackerras return H_PARAMETER; 393697d3899SPaul Mackerras } 394c77162deSPaul Mackerras 395342d3db7SPaul Mackerras do_insert: 396342d3db7SPaul Mackerras /* Protect linux PTE lookup from page table destruction */ 397342d3db7SPaul Mackerras rcu_read_lock_sched(); /* this disables preemption too */ 3987ed661bfSPaul Mackerras ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, 3997ed661bfSPaul Mackerras current->mm->pgd, false, pte_idx_ret); 400342d3db7SPaul Mackerras rcu_read_unlock_sched(); 401c77162deSPaul Mackerras if (ret == H_TOO_HARD) { 402c77162deSPaul Mackerras /* this can't happen */ 403c77162deSPaul Mackerras pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 404c77162deSPaul Mackerras ret = H_RESOURCE; /* or something */ 405c77162deSPaul Mackerras } 406c77162deSPaul Mackerras return ret; 407c77162deSPaul Mackerras 408c77162deSPaul Mackerras } 409c77162deSPaul Mackerras 4107ed661bfSPaul Mackerras /* 4117ed661bfSPaul Mackerras * We come here on a H_ENTER call from the guest when we are not 4127ed661bfSPaul Mackerras * using mmu notifiers and we don't have the requested page pinned 4137ed661bfSPaul Mackerras * already. 4147ed661bfSPaul Mackerras */ 4157ed661bfSPaul Mackerras long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 4167ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 4177ed661bfSPaul Mackerras unsigned long ptel) 4187ed661bfSPaul Mackerras { 4197ed661bfSPaul Mackerras return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, 4207ed661bfSPaul Mackerras pteh, ptel, &vcpu->arch.gpr[4]); 4217ed661bfSPaul Mackerras } 4227ed661bfSPaul Mackerras 423697d3899SPaul Mackerras static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 424697d3899SPaul Mackerras gva_t eaddr) 425697d3899SPaul Mackerras { 426697d3899SPaul Mackerras u64 mask; 427697d3899SPaul Mackerras int i; 428697d3899SPaul Mackerras 429697d3899SPaul Mackerras for (i = 0; i < vcpu->arch.slb_nr; i++) { 430697d3899SPaul Mackerras if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) 431697d3899SPaul Mackerras continue; 432697d3899SPaul Mackerras 433697d3899SPaul Mackerras if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) 434697d3899SPaul Mackerras mask = ESID_MASK_1T; 435697d3899SPaul Mackerras else 436697d3899SPaul Mackerras mask = ESID_MASK; 437697d3899SPaul Mackerras 438697d3899SPaul Mackerras if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) 439697d3899SPaul Mackerras return &vcpu->arch.slb[i]; 440697d3899SPaul Mackerras } 441697d3899SPaul Mackerras return NULL; 442697d3899SPaul Mackerras } 443697d3899SPaul Mackerras 444697d3899SPaul Mackerras static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, 445697d3899SPaul Mackerras unsigned long ea) 446697d3899SPaul Mackerras { 447697d3899SPaul Mackerras unsigned long ra_mask; 448697d3899SPaul Mackerras 449697d3899SPaul Mackerras ra_mask = hpte_page_size(v, r) - 1; 450697d3899SPaul Mackerras return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); 451697d3899SPaul Mackerras } 452697d3899SPaul Mackerras 453de56a948SPaul Mackerras static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 454de56a948SPaul Mackerras struct kvmppc_pte *gpte, bool data) 455de56a948SPaul Mackerras { 456697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 457697d3899SPaul Mackerras struct kvmppc_slb *slbe; 458697d3899SPaul Mackerras unsigned long slb_v; 459697d3899SPaul Mackerras unsigned long pp, key; 460697d3899SPaul Mackerras unsigned long v, gr; 461697d3899SPaul Mackerras unsigned long *hptep; 462697d3899SPaul Mackerras int index; 463697d3899SPaul Mackerras int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 464697d3899SPaul Mackerras 465697d3899SPaul Mackerras /* Get SLB entry */ 466697d3899SPaul Mackerras if (virtmode) { 467697d3899SPaul Mackerras slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); 468697d3899SPaul Mackerras if (!slbe) 469697d3899SPaul Mackerras return -EINVAL; 470697d3899SPaul Mackerras slb_v = slbe->origv; 471697d3899SPaul Mackerras } else { 472697d3899SPaul Mackerras /* real mode access */ 473697d3899SPaul Mackerras slb_v = vcpu->kvm->arch.vrma_slb_v; 474697d3899SPaul Mackerras } 475697d3899SPaul Mackerras 476697d3899SPaul Mackerras /* Find the HPTE in the hash table */ 477697d3899SPaul Mackerras index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, 478697d3899SPaul Mackerras HPTE_V_VALID | HPTE_V_ABSENT); 479697d3899SPaul Mackerras if (index < 0) 480de56a948SPaul Mackerras return -ENOENT; 481697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 482697d3899SPaul Mackerras v = hptep[0] & ~HPTE_V_HVLOCK; 483697d3899SPaul Mackerras gr = kvm->arch.revmap[index].guest_rpte; 484697d3899SPaul Mackerras 485697d3899SPaul Mackerras /* Unlock the HPTE */ 486697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 487697d3899SPaul Mackerras hptep[0] = v; 488697d3899SPaul Mackerras 489697d3899SPaul Mackerras gpte->eaddr = eaddr; 490697d3899SPaul Mackerras gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); 491697d3899SPaul Mackerras 492697d3899SPaul Mackerras /* Get PP bits and key for permission check */ 493697d3899SPaul Mackerras pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 494697d3899SPaul Mackerras key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 495697d3899SPaul Mackerras key &= slb_v; 496697d3899SPaul Mackerras 497697d3899SPaul Mackerras /* Calculate permissions */ 498697d3899SPaul Mackerras gpte->may_read = hpte_read_permission(pp, key); 499697d3899SPaul Mackerras gpte->may_write = hpte_write_permission(pp, key); 500697d3899SPaul Mackerras gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); 501697d3899SPaul Mackerras 502697d3899SPaul Mackerras /* Storage key permission check for POWER7 */ 503697d3899SPaul Mackerras if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { 504697d3899SPaul Mackerras int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); 505697d3899SPaul Mackerras if (amrfield & 1) 506697d3899SPaul Mackerras gpte->may_read = 0; 507697d3899SPaul Mackerras if (amrfield & 2) 508697d3899SPaul Mackerras gpte->may_write = 0; 509697d3899SPaul Mackerras } 510697d3899SPaul Mackerras 511697d3899SPaul Mackerras /* Get the guest physical address */ 512697d3899SPaul Mackerras gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); 513697d3899SPaul Mackerras return 0; 514697d3899SPaul Mackerras } 515697d3899SPaul Mackerras 516697d3899SPaul Mackerras /* 517697d3899SPaul Mackerras * Quick test for whether an instruction is a load or a store. 518697d3899SPaul Mackerras * If the instruction is a load or a store, then this will indicate 519697d3899SPaul Mackerras * which it is, at least on server processors. (Embedded processors 520697d3899SPaul Mackerras * have some external PID instructions that don't follow the rule 521697d3899SPaul Mackerras * embodied here.) If the instruction isn't a load or store, then 522697d3899SPaul Mackerras * this doesn't return anything useful. 523697d3899SPaul Mackerras */ 524697d3899SPaul Mackerras static int instruction_is_store(unsigned int instr) 525697d3899SPaul Mackerras { 526697d3899SPaul Mackerras unsigned int mask; 527697d3899SPaul Mackerras 528697d3899SPaul Mackerras mask = 0x10000000; 529697d3899SPaul Mackerras if ((instr & 0xfc000000) == 0x7c000000) 530697d3899SPaul Mackerras mask = 0x100; /* major opcode 31 */ 531697d3899SPaul Mackerras return (instr & mask) != 0; 532697d3899SPaul Mackerras } 533697d3899SPaul Mackerras 534697d3899SPaul Mackerras static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 5356020c0f6SAlexander Graf unsigned long gpa, gva_t ea, int is_store) 536697d3899SPaul Mackerras { 537697d3899SPaul Mackerras int ret; 538697d3899SPaul Mackerras u32 last_inst; 539697d3899SPaul Mackerras unsigned long srr0 = kvmppc_get_pc(vcpu); 540697d3899SPaul Mackerras 541697d3899SPaul Mackerras /* We try to load the last instruction. We don't let 542697d3899SPaul Mackerras * emulate_instruction do it as it doesn't check what 543697d3899SPaul Mackerras * kvmppc_ld returns. 544697d3899SPaul Mackerras * If we fail, we just return to the guest and try executing it again. 545697d3899SPaul Mackerras */ 546697d3899SPaul Mackerras if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { 547697d3899SPaul Mackerras ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 548697d3899SPaul Mackerras if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) 549697d3899SPaul Mackerras return RESUME_GUEST; 550697d3899SPaul Mackerras vcpu->arch.last_inst = last_inst; 551697d3899SPaul Mackerras } 552697d3899SPaul Mackerras 553697d3899SPaul Mackerras /* 554697d3899SPaul Mackerras * WARNING: We do not know for sure whether the instruction we just 555697d3899SPaul Mackerras * read from memory is the same that caused the fault in the first 556697d3899SPaul Mackerras * place. If the instruction we read is neither an load or a store, 557697d3899SPaul Mackerras * then it can't access memory, so we don't need to worry about 558697d3899SPaul Mackerras * enforcing access permissions. So, assuming it is a load or 559697d3899SPaul Mackerras * store, we just check that its direction (load or store) is 560697d3899SPaul Mackerras * consistent with the original fault, since that's what we 561697d3899SPaul Mackerras * checked the access permissions against. If there is a mismatch 562697d3899SPaul Mackerras * we just return and retry the instruction. 563697d3899SPaul Mackerras */ 564697d3899SPaul Mackerras 565697d3899SPaul Mackerras if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) 566697d3899SPaul Mackerras return RESUME_GUEST; 567697d3899SPaul Mackerras 568697d3899SPaul Mackerras /* 569697d3899SPaul Mackerras * Emulated accesses are emulated by looking at the hash for 570697d3899SPaul Mackerras * translation once, then performing the access later. The 571697d3899SPaul Mackerras * translation could be invalidated in the meantime in which 572697d3899SPaul Mackerras * point performing the subsequent memory access on the old 573697d3899SPaul Mackerras * physical address could possibly be a security hole for the 574697d3899SPaul Mackerras * guest (but not the host). 575697d3899SPaul Mackerras * 576697d3899SPaul Mackerras * This is less of an issue for MMIO stores since they aren't 577697d3899SPaul Mackerras * globally visible. It could be an issue for MMIO loads to 578697d3899SPaul Mackerras * a certain extent but we'll ignore it for now. 579697d3899SPaul Mackerras */ 580697d3899SPaul Mackerras 581697d3899SPaul Mackerras vcpu->arch.paddr_accessed = gpa; 5826020c0f6SAlexander Graf vcpu->arch.vaddr_accessed = ea; 583697d3899SPaul Mackerras return kvmppc_emulate_mmio(run, vcpu); 584697d3899SPaul Mackerras } 585697d3899SPaul Mackerras 586697d3899SPaul Mackerras int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 587697d3899SPaul Mackerras unsigned long ea, unsigned long dsisr) 588697d3899SPaul Mackerras { 589697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 590342d3db7SPaul Mackerras unsigned long *hptep, hpte[3], r; 591342d3db7SPaul Mackerras unsigned long mmu_seq, psize, pte_size; 59270bddfefSPaul Mackerras unsigned long gpa, gfn, hva, pfn; 593697d3899SPaul Mackerras struct kvm_memory_slot *memslot; 594342d3db7SPaul Mackerras unsigned long *rmap; 595697d3899SPaul Mackerras struct revmap_entry *rev; 596342d3db7SPaul Mackerras struct page *page, *pages[1]; 597342d3db7SPaul Mackerras long index, ret, npages; 598342d3db7SPaul Mackerras unsigned long is_io; 5994cf302bcSPaul Mackerras unsigned int writing, write_ok; 600342d3db7SPaul Mackerras struct vm_area_struct *vma; 601bad3b507SPaul Mackerras unsigned long rcbits; 602697d3899SPaul Mackerras 603697d3899SPaul Mackerras /* 604697d3899SPaul Mackerras * Real-mode code has already searched the HPT and found the 605697d3899SPaul Mackerras * entry we're interested in. Lock the entry and check that 606697d3899SPaul Mackerras * it hasn't changed. If it has, just return and re-execute the 607697d3899SPaul Mackerras * instruction. 608697d3899SPaul Mackerras */ 609697d3899SPaul Mackerras if (ea != vcpu->arch.pgfault_addr) 610697d3899SPaul Mackerras return RESUME_GUEST; 611697d3899SPaul Mackerras index = vcpu->arch.pgfault_index; 612697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 613697d3899SPaul Mackerras rev = &kvm->arch.revmap[index]; 614697d3899SPaul Mackerras preempt_disable(); 615697d3899SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 616697d3899SPaul Mackerras cpu_relax(); 617697d3899SPaul Mackerras hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 618697d3899SPaul Mackerras hpte[1] = hptep[1]; 619342d3db7SPaul Mackerras hpte[2] = r = rev->guest_rpte; 620697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 621697d3899SPaul Mackerras hptep[0] = hpte[0]; 622697d3899SPaul Mackerras preempt_enable(); 623697d3899SPaul Mackerras 624697d3899SPaul Mackerras if (hpte[0] != vcpu->arch.pgfault_hpte[0] || 625697d3899SPaul Mackerras hpte[1] != vcpu->arch.pgfault_hpte[1]) 626697d3899SPaul Mackerras return RESUME_GUEST; 627697d3899SPaul Mackerras 628697d3899SPaul Mackerras /* Translate the logical address and get the page */ 629342d3db7SPaul Mackerras psize = hpte_page_size(hpte[0], r); 63070bddfefSPaul Mackerras gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); 63170bddfefSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 632697d3899SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 633697d3899SPaul Mackerras 634697d3899SPaul Mackerras /* No memslot means it's an emulated MMIO region */ 63570bddfefSPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 6366020c0f6SAlexander Graf return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 637697d3899SPaul Mackerras dsisr & DSISR_ISSTORE); 638697d3899SPaul Mackerras 639342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 640342d3db7SPaul Mackerras return -EFAULT; /* should never get here */ 641342d3db7SPaul Mackerras 642342d3db7SPaul Mackerras /* used to check for invalidations in progress */ 643342d3db7SPaul Mackerras mmu_seq = kvm->mmu_notifier_seq; 644342d3db7SPaul Mackerras smp_rmb(); 645342d3db7SPaul Mackerras 646342d3db7SPaul Mackerras is_io = 0; 647342d3db7SPaul Mackerras pfn = 0; 648342d3db7SPaul Mackerras page = NULL; 649342d3db7SPaul Mackerras pte_size = PAGE_SIZE; 6504cf302bcSPaul Mackerras writing = (dsisr & DSISR_ISSTORE) != 0; 6514cf302bcSPaul Mackerras /* If writing != 0, then the HPTE must allow writing, if we get here */ 6524cf302bcSPaul Mackerras write_ok = writing; 653342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 6544cf302bcSPaul Mackerras npages = get_user_pages_fast(hva, 1, writing, pages); 655342d3db7SPaul Mackerras if (npages < 1) { 656342d3db7SPaul Mackerras /* Check if it's an I/O mapping */ 657342d3db7SPaul Mackerras down_read(¤t->mm->mmap_sem); 658342d3db7SPaul Mackerras vma = find_vma(current->mm, hva); 659342d3db7SPaul Mackerras if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && 660342d3db7SPaul Mackerras (vma->vm_flags & VM_PFNMAP)) { 661342d3db7SPaul Mackerras pfn = vma->vm_pgoff + 662342d3db7SPaul Mackerras ((hva - vma->vm_start) >> PAGE_SHIFT); 663342d3db7SPaul Mackerras pte_size = psize; 664342d3db7SPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 6654cf302bcSPaul Mackerras write_ok = vma->vm_flags & VM_WRITE; 666342d3db7SPaul Mackerras } 667342d3db7SPaul Mackerras up_read(¤t->mm->mmap_sem); 668342d3db7SPaul Mackerras if (!pfn) 669697d3899SPaul Mackerras return -EFAULT; 670342d3db7SPaul Mackerras } else { 671342d3db7SPaul Mackerras page = pages[0]; 672342d3db7SPaul Mackerras if (PageHuge(page)) { 673342d3db7SPaul Mackerras page = compound_head(page); 674342d3db7SPaul Mackerras pte_size <<= compound_order(page); 675342d3db7SPaul Mackerras } 6764cf302bcSPaul Mackerras /* if the guest wants write access, see if that is OK */ 6774cf302bcSPaul Mackerras if (!writing && hpte_is_writable(r)) { 6784cf302bcSPaul Mackerras pte_t *ptep, pte; 6794cf302bcSPaul Mackerras 6804cf302bcSPaul Mackerras /* 6814cf302bcSPaul Mackerras * We need to protect against page table destruction 6824cf302bcSPaul Mackerras * while looking up and updating the pte. 6834cf302bcSPaul Mackerras */ 6844cf302bcSPaul Mackerras rcu_read_lock_sched(); 6854cf302bcSPaul Mackerras ptep = find_linux_pte_or_hugepte(current->mm->pgd, 6864cf302bcSPaul Mackerras hva, NULL); 6874cf302bcSPaul Mackerras if (ptep && pte_present(*ptep)) { 6884cf302bcSPaul Mackerras pte = kvmppc_read_update_linux_pte(ptep, 1); 6894cf302bcSPaul Mackerras if (pte_write(pte)) 6904cf302bcSPaul Mackerras write_ok = 1; 6914cf302bcSPaul Mackerras } 6924cf302bcSPaul Mackerras rcu_read_unlock_sched(); 6934cf302bcSPaul Mackerras } 694342d3db7SPaul Mackerras pfn = page_to_pfn(page); 695342d3db7SPaul Mackerras } 696342d3db7SPaul Mackerras 697342d3db7SPaul Mackerras ret = -EFAULT; 698342d3db7SPaul Mackerras if (psize > pte_size) 699342d3db7SPaul Mackerras goto out_put; 700342d3db7SPaul Mackerras 701342d3db7SPaul Mackerras /* Check WIMG vs. the actual page we're accessing */ 702342d3db7SPaul Mackerras if (!hpte_cache_flags_ok(r, is_io)) { 703342d3db7SPaul Mackerras if (is_io) 704342d3db7SPaul Mackerras return -EFAULT; 705342d3db7SPaul Mackerras /* 706342d3db7SPaul Mackerras * Allow guest to map emulated device memory as 707342d3db7SPaul Mackerras * uncacheable, but actually make it cacheable. 708342d3db7SPaul Mackerras */ 709342d3db7SPaul Mackerras r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; 710342d3db7SPaul Mackerras } 711342d3db7SPaul Mackerras 712342d3db7SPaul Mackerras /* Set the HPTE to point to pfn */ 713342d3db7SPaul Mackerras r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); 7144cf302bcSPaul Mackerras if (hpte_is_writable(r) && !write_ok) 7154cf302bcSPaul Mackerras r = hpte_make_readonly(r); 716342d3db7SPaul Mackerras ret = RESUME_GUEST; 717342d3db7SPaul Mackerras preempt_disable(); 718342d3db7SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 719342d3db7SPaul Mackerras cpu_relax(); 720342d3db7SPaul Mackerras if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || 721342d3db7SPaul Mackerras rev->guest_rpte != hpte[2]) 722342d3db7SPaul Mackerras /* HPTE has been changed under us; let the guest retry */ 723342d3db7SPaul Mackerras goto out_unlock; 724342d3db7SPaul Mackerras hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 725342d3db7SPaul Mackerras 726d89cc617STakuya Yoshikawa rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 727342d3db7SPaul Mackerras lock_rmap(rmap); 728342d3db7SPaul Mackerras 729342d3db7SPaul Mackerras /* Check if we might have been invalidated; let the guest retry if so */ 730342d3db7SPaul Mackerras ret = RESUME_GUEST; 7318ca40a70SChristoffer Dall if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 732342d3db7SPaul Mackerras unlock_rmap(rmap); 733342d3db7SPaul Mackerras goto out_unlock; 734342d3db7SPaul Mackerras } 7354cf302bcSPaul Mackerras 736bad3b507SPaul Mackerras /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ 737bad3b507SPaul Mackerras rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 738bad3b507SPaul Mackerras r &= rcbits | ~(HPTE_R_R | HPTE_R_C); 739bad3b507SPaul Mackerras 7404cf302bcSPaul Mackerras if (hptep[0] & HPTE_V_VALID) { 7414cf302bcSPaul Mackerras /* HPTE was previously valid, so we need to invalidate it */ 7424cf302bcSPaul Mackerras unlock_rmap(rmap); 7434cf302bcSPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 7444cf302bcSPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, index); 745bad3b507SPaul Mackerras /* don't lose previous R and C bits */ 746bad3b507SPaul Mackerras r |= hptep[1] & (HPTE_R_R | HPTE_R_C); 7474cf302bcSPaul Mackerras } else { 748342d3db7SPaul Mackerras kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 7494cf302bcSPaul Mackerras } 750342d3db7SPaul Mackerras 751342d3db7SPaul Mackerras hptep[1] = r; 752342d3db7SPaul Mackerras eieio(); 753342d3db7SPaul Mackerras hptep[0] = hpte[0]; 754342d3db7SPaul Mackerras asm volatile("ptesync" : : : "memory"); 755342d3db7SPaul Mackerras preempt_enable(); 7564cf302bcSPaul Mackerras if (page && hpte_is_writable(r)) 757342d3db7SPaul Mackerras SetPageDirty(page); 758342d3db7SPaul Mackerras 759342d3db7SPaul Mackerras out_put: 760de6c0b02SDavid Gibson if (page) { 761de6c0b02SDavid Gibson /* 762de6c0b02SDavid Gibson * We drop pages[0] here, not page because page might 763de6c0b02SDavid Gibson * have been set to the head page of a compound, but 764de6c0b02SDavid Gibson * we have to drop the reference on the correct tail 765de6c0b02SDavid Gibson * page to match the get inside gup() 766de6c0b02SDavid Gibson */ 767de6c0b02SDavid Gibson put_page(pages[0]); 768de6c0b02SDavid Gibson } 769342d3db7SPaul Mackerras return ret; 770342d3db7SPaul Mackerras 771342d3db7SPaul Mackerras out_unlock: 772342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 773342d3db7SPaul Mackerras preempt_enable(); 774342d3db7SPaul Mackerras goto out_put; 775342d3db7SPaul Mackerras } 776342d3db7SPaul Mackerras 777a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm) 778a64fd707SPaul Mackerras { 779a64fd707SPaul Mackerras struct kvm_memslots *slots; 780a64fd707SPaul Mackerras struct kvm_memory_slot *memslot; 781a64fd707SPaul Mackerras int srcu_idx; 782a64fd707SPaul Mackerras 783a64fd707SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 784a64fd707SPaul Mackerras slots = kvm->memslots; 785a64fd707SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 786a64fd707SPaul Mackerras /* 787a64fd707SPaul Mackerras * This assumes it is acceptable to lose reference and 788a64fd707SPaul Mackerras * change bits across a reset. 789a64fd707SPaul Mackerras */ 790a64fd707SPaul Mackerras memset(memslot->arch.rmap, 0, 791a64fd707SPaul Mackerras memslot->npages * sizeof(*memslot->arch.rmap)); 792a64fd707SPaul Mackerras } 793a64fd707SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 794a64fd707SPaul Mackerras } 795a64fd707SPaul Mackerras 79684504ef3STakuya Yoshikawa static int kvm_handle_hva_range(struct kvm *kvm, 79784504ef3STakuya Yoshikawa unsigned long start, 79884504ef3STakuya Yoshikawa unsigned long end, 79984504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, 80084504ef3STakuya Yoshikawa unsigned long *rmapp, 801342d3db7SPaul Mackerras unsigned long gfn)) 802342d3db7SPaul Mackerras { 803342d3db7SPaul Mackerras int ret; 804342d3db7SPaul Mackerras int retval = 0; 805342d3db7SPaul Mackerras struct kvm_memslots *slots; 806342d3db7SPaul Mackerras struct kvm_memory_slot *memslot; 807342d3db7SPaul Mackerras 808342d3db7SPaul Mackerras slots = kvm_memslots(kvm); 809342d3db7SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 81084504ef3STakuya Yoshikawa unsigned long hva_start, hva_end; 81184504ef3STakuya Yoshikawa gfn_t gfn, gfn_end; 812342d3db7SPaul Mackerras 81384504ef3STakuya Yoshikawa hva_start = max(start, memslot->userspace_addr); 81484504ef3STakuya Yoshikawa hva_end = min(end, memslot->userspace_addr + 81584504ef3STakuya Yoshikawa (memslot->npages << PAGE_SHIFT)); 81684504ef3STakuya Yoshikawa if (hva_start >= hva_end) 81784504ef3STakuya Yoshikawa continue; 81884504ef3STakuya Yoshikawa /* 81984504ef3STakuya Yoshikawa * {gfn(page) | page intersects with [hva_start, hva_end)} = 82084504ef3STakuya Yoshikawa * {gfn, gfn+1, ..., gfn_end-1}. 82184504ef3STakuya Yoshikawa */ 82284504ef3STakuya Yoshikawa gfn = hva_to_gfn_memslot(hva_start, memslot); 82384504ef3STakuya Yoshikawa gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 82484504ef3STakuya Yoshikawa 82584504ef3STakuya Yoshikawa for (; gfn < gfn_end; ++gfn) { 826d19a748bSTakuya Yoshikawa gfn_t gfn_offset = gfn - memslot->base_gfn; 827342d3db7SPaul Mackerras 828d89cc617STakuya Yoshikawa ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); 829342d3db7SPaul Mackerras retval |= ret; 830342d3db7SPaul Mackerras } 831342d3db7SPaul Mackerras } 832342d3db7SPaul Mackerras 833342d3db7SPaul Mackerras return retval; 834342d3db7SPaul Mackerras } 835342d3db7SPaul Mackerras 83684504ef3STakuya Yoshikawa static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 83784504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, unsigned long *rmapp, 83884504ef3STakuya Yoshikawa unsigned long gfn)) 83984504ef3STakuya Yoshikawa { 84084504ef3STakuya Yoshikawa return kvm_handle_hva_range(kvm, hva, hva + 1, handler); 84184504ef3STakuya Yoshikawa } 84284504ef3STakuya Yoshikawa 843342d3db7SPaul Mackerras static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 844342d3db7SPaul Mackerras unsigned long gfn) 845342d3db7SPaul Mackerras { 846342d3db7SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 847342d3db7SPaul Mackerras unsigned long h, i, j; 848342d3db7SPaul Mackerras unsigned long *hptep; 849bad3b507SPaul Mackerras unsigned long ptel, psize, rcbits; 850342d3db7SPaul Mackerras 851342d3db7SPaul Mackerras for (;;) { 852bad3b507SPaul Mackerras lock_rmap(rmapp); 853342d3db7SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 854bad3b507SPaul Mackerras unlock_rmap(rmapp); 855342d3db7SPaul Mackerras break; 856342d3db7SPaul Mackerras } 857342d3db7SPaul Mackerras 858342d3db7SPaul Mackerras /* 859342d3db7SPaul Mackerras * To avoid an ABBA deadlock with the HPTE lock bit, 860bad3b507SPaul Mackerras * we can't spin on the HPTE lock while holding the 861bad3b507SPaul Mackerras * rmap chain lock. 862342d3db7SPaul Mackerras */ 863342d3db7SPaul Mackerras i = *rmapp & KVMPPC_RMAP_INDEX; 864bad3b507SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 865bad3b507SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 866bad3b507SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 867bad3b507SPaul Mackerras unlock_rmap(rmapp); 868bad3b507SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 869bad3b507SPaul Mackerras cpu_relax(); 870bad3b507SPaul Mackerras continue; 871bad3b507SPaul Mackerras } 872342d3db7SPaul Mackerras j = rev[i].forw; 873342d3db7SPaul Mackerras if (j == i) { 874342d3db7SPaul Mackerras /* chain is now empty */ 875bad3b507SPaul Mackerras *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 876342d3db7SPaul Mackerras } else { 877342d3db7SPaul Mackerras /* remove i from chain */ 878342d3db7SPaul Mackerras h = rev[i].back; 879342d3db7SPaul Mackerras rev[h].forw = j; 880342d3db7SPaul Mackerras rev[j].back = h; 881342d3db7SPaul Mackerras rev[i].forw = rev[i].back = i; 882bad3b507SPaul Mackerras *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; 883342d3db7SPaul Mackerras } 884342d3db7SPaul Mackerras 885bad3b507SPaul Mackerras /* Now check and modify the HPTE */ 886342d3db7SPaul Mackerras ptel = rev[i].guest_rpte; 887342d3db7SPaul Mackerras psize = hpte_page_size(hptep[0], ptel); 888342d3db7SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && 889342d3db7SPaul Mackerras hpte_rpn(ptel, psize) == gfn) { 890dfe49dbdSPaul Mackerras if (kvm->arch.using_mmu_notifiers) 891342d3db7SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 892bad3b507SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 893bad3b507SPaul Mackerras /* Harvest R and C */ 894bad3b507SPaul Mackerras rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 895bad3b507SPaul Mackerras *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 896a1b4a0f6SPaul Mackerras if (rcbits & ~rev[i].guest_rpte) { 897bad3b507SPaul Mackerras rev[i].guest_rpte = ptel | rcbits; 898a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 899a1b4a0f6SPaul Mackerras } 900342d3db7SPaul Mackerras } 901bad3b507SPaul Mackerras unlock_rmap(rmapp); 902342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 903342d3db7SPaul Mackerras } 904342d3db7SPaul Mackerras return 0; 905342d3db7SPaul Mackerras } 906342d3db7SPaul Mackerras 907342d3db7SPaul Mackerras int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 908342d3db7SPaul Mackerras { 909342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 910342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 911342d3db7SPaul Mackerras return 0; 912342d3db7SPaul Mackerras } 913342d3db7SPaul Mackerras 914b3ae2096STakuya Yoshikawa int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 915b3ae2096STakuya Yoshikawa { 916b3ae2096STakuya Yoshikawa if (kvm->arch.using_mmu_notifiers) 917b3ae2096STakuya Yoshikawa kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 918b3ae2096STakuya Yoshikawa return 0; 919b3ae2096STakuya Yoshikawa } 920b3ae2096STakuya Yoshikawa 921dfe49dbdSPaul Mackerras void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 922dfe49dbdSPaul Mackerras { 923dfe49dbdSPaul Mackerras unsigned long *rmapp; 924dfe49dbdSPaul Mackerras unsigned long gfn; 925dfe49dbdSPaul Mackerras unsigned long n; 926dfe49dbdSPaul Mackerras 927dfe49dbdSPaul Mackerras rmapp = memslot->arch.rmap; 928dfe49dbdSPaul Mackerras gfn = memslot->base_gfn; 929dfe49dbdSPaul Mackerras for (n = memslot->npages; n; --n) { 930dfe49dbdSPaul Mackerras /* 931dfe49dbdSPaul Mackerras * Testing the present bit without locking is OK because 932dfe49dbdSPaul Mackerras * the memslot has been marked invalid already, and hence 933dfe49dbdSPaul Mackerras * no new HPTEs referencing this page can be created, 934dfe49dbdSPaul Mackerras * thus the present bit can't go from 0 to 1. 935dfe49dbdSPaul Mackerras */ 936dfe49dbdSPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) 937dfe49dbdSPaul Mackerras kvm_unmap_rmapp(kvm, rmapp, gfn); 938dfe49dbdSPaul Mackerras ++rmapp; 939dfe49dbdSPaul Mackerras ++gfn; 940dfe49dbdSPaul Mackerras } 941dfe49dbdSPaul Mackerras } 942dfe49dbdSPaul Mackerras 943342d3db7SPaul Mackerras static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 944342d3db7SPaul Mackerras unsigned long gfn) 945342d3db7SPaul Mackerras { 94655514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 94755514893SPaul Mackerras unsigned long head, i, j; 94855514893SPaul Mackerras unsigned long *hptep; 94955514893SPaul Mackerras int ret = 0; 95055514893SPaul Mackerras 95155514893SPaul Mackerras retry: 95255514893SPaul Mackerras lock_rmap(rmapp); 95355514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) { 954bad3b507SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_REFERENCED; 95555514893SPaul Mackerras ret = 1; 95655514893SPaul Mackerras } 95755514893SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 95855514893SPaul Mackerras unlock_rmap(rmapp); 95955514893SPaul Mackerras return ret; 96055514893SPaul Mackerras } 96155514893SPaul Mackerras 96255514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 96355514893SPaul Mackerras do { 96455514893SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 96555514893SPaul Mackerras j = rev[i].forw; 96655514893SPaul Mackerras 96755514893SPaul Mackerras /* If this HPTE isn't referenced, ignore it */ 96855514893SPaul Mackerras if (!(hptep[1] & HPTE_R_R)) 96955514893SPaul Mackerras continue; 97055514893SPaul Mackerras 97155514893SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 97255514893SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 97355514893SPaul Mackerras unlock_rmap(rmapp); 97455514893SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 97555514893SPaul Mackerras cpu_relax(); 97655514893SPaul Mackerras goto retry; 97755514893SPaul Mackerras } 97855514893SPaul Mackerras 97955514893SPaul Mackerras /* Now check and modify the HPTE */ 98055514893SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 98155514893SPaul Mackerras kvmppc_clear_ref_hpte(kvm, hptep, i); 982a1b4a0f6SPaul Mackerras if (!(rev[i].guest_rpte & HPTE_R_R)) { 98355514893SPaul Mackerras rev[i].guest_rpte |= HPTE_R_R; 984a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 985a1b4a0f6SPaul Mackerras } 98655514893SPaul Mackerras ret = 1; 98755514893SPaul Mackerras } 98855514893SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 98955514893SPaul Mackerras } while ((i = j) != head); 99055514893SPaul Mackerras 99155514893SPaul Mackerras unlock_rmap(rmapp); 99255514893SPaul Mackerras return ret; 993342d3db7SPaul Mackerras } 994342d3db7SPaul Mackerras 995342d3db7SPaul Mackerras int kvm_age_hva(struct kvm *kvm, unsigned long hva) 996342d3db7SPaul Mackerras { 997342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 998342d3db7SPaul Mackerras return 0; 999342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 1000342d3db7SPaul Mackerras } 1001342d3db7SPaul Mackerras 1002342d3db7SPaul Mackerras static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1003342d3db7SPaul Mackerras unsigned long gfn) 1004342d3db7SPaul Mackerras { 100555514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 100655514893SPaul Mackerras unsigned long head, i, j; 100755514893SPaul Mackerras unsigned long *hp; 100855514893SPaul Mackerras int ret = 1; 100955514893SPaul Mackerras 101055514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 101155514893SPaul Mackerras return 1; 101255514893SPaul Mackerras 101355514893SPaul Mackerras lock_rmap(rmapp); 101455514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 101555514893SPaul Mackerras goto out; 101655514893SPaul Mackerras 101755514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) { 101855514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 101955514893SPaul Mackerras do { 102055514893SPaul Mackerras hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); 102155514893SPaul Mackerras j = rev[i].forw; 102255514893SPaul Mackerras if (hp[1] & HPTE_R_R) 102355514893SPaul Mackerras goto out; 102455514893SPaul Mackerras } while ((i = j) != head); 102555514893SPaul Mackerras } 102655514893SPaul Mackerras ret = 0; 102755514893SPaul Mackerras 102855514893SPaul Mackerras out: 102955514893SPaul Mackerras unlock_rmap(rmapp); 103055514893SPaul Mackerras return ret; 1031342d3db7SPaul Mackerras } 1032342d3db7SPaul Mackerras 1033342d3db7SPaul Mackerras int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1034342d3db7SPaul Mackerras { 1035342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1036342d3db7SPaul Mackerras return 0; 1037342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1038342d3db7SPaul Mackerras } 1039342d3db7SPaul Mackerras 1040342d3db7SPaul Mackerras void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1041342d3db7SPaul Mackerras { 1042342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1043342d3db7SPaul Mackerras return; 1044342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 1045de56a948SPaul Mackerras } 1046de56a948SPaul Mackerras 104782ed3616SPaul Mackerras static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 104882ed3616SPaul Mackerras { 104982ed3616SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 105082ed3616SPaul Mackerras unsigned long head, i, j; 105182ed3616SPaul Mackerras unsigned long *hptep; 105282ed3616SPaul Mackerras int ret = 0; 105382ed3616SPaul Mackerras 105482ed3616SPaul Mackerras retry: 105582ed3616SPaul Mackerras lock_rmap(rmapp); 105682ed3616SPaul Mackerras if (*rmapp & KVMPPC_RMAP_CHANGED) { 105782ed3616SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_CHANGED; 105882ed3616SPaul Mackerras ret = 1; 105982ed3616SPaul Mackerras } 106082ed3616SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 106182ed3616SPaul Mackerras unlock_rmap(rmapp); 106282ed3616SPaul Mackerras return ret; 106382ed3616SPaul Mackerras } 106482ed3616SPaul Mackerras 106582ed3616SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 106682ed3616SPaul Mackerras do { 106782ed3616SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 106882ed3616SPaul Mackerras j = rev[i].forw; 106982ed3616SPaul Mackerras 107082ed3616SPaul Mackerras if (!(hptep[1] & HPTE_R_C)) 107182ed3616SPaul Mackerras continue; 107282ed3616SPaul Mackerras 107382ed3616SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 107482ed3616SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 107582ed3616SPaul Mackerras unlock_rmap(rmapp); 107682ed3616SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 107782ed3616SPaul Mackerras cpu_relax(); 107882ed3616SPaul Mackerras goto retry; 107982ed3616SPaul Mackerras } 108082ed3616SPaul Mackerras 108182ed3616SPaul Mackerras /* Now check and modify the HPTE */ 108282ed3616SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { 108382ed3616SPaul Mackerras /* need to make it temporarily absent to clear C */ 108482ed3616SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 108582ed3616SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 108682ed3616SPaul Mackerras hptep[1] &= ~HPTE_R_C; 108782ed3616SPaul Mackerras eieio(); 108882ed3616SPaul Mackerras hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 1089a1b4a0f6SPaul Mackerras if (!(rev[i].guest_rpte & HPTE_R_C)) { 109082ed3616SPaul Mackerras rev[i].guest_rpte |= HPTE_R_C; 1091a1b4a0f6SPaul Mackerras note_hpte_modification(kvm, &rev[i]); 1092a1b4a0f6SPaul Mackerras } 109382ed3616SPaul Mackerras ret = 1; 109482ed3616SPaul Mackerras } 109582ed3616SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 109682ed3616SPaul Mackerras } while ((i = j) != head); 109782ed3616SPaul Mackerras 109882ed3616SPaul Mackerras unlock_rmap(rmapp); 109982ed3616SPaul Mackerras return ret; 110082ed3616SPaul Mackerras } 110182ed3616SPaul Mackerras 1102c35635efSPaul Mackerras static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1103c35635efSPaul Mackerras struct kvm_memory_slot *memslot, 1104c35635efSPaul Mackerras unsigned long *map) 1105c35635efSPaul Mackerras { 1106c35635efSPaul Mackerras unsigned long gfn; 1107c35635efSPaul Mackerras 1108c35635efSPaul Mackerras if (!vpa->dirty || !vpa->pinned_addr) 1109c35635efSPaul Mackerras return; 1110c35635efSPaul Mackerras gfn = vpa->gpa >> PAGE_SHIFT; 1111c35635efSPaul Mackerras if (gfn < memslot->base_gfn || 1112c35635efSPaul Mackerras gfn >= memslot->base_gfn + memslot->npages) 1113c35635efSPaul Mackerras return; 1114c35635efSPaul Mackerras 1115c35635efSPaul Mackerras vpa->dirty = false; 1116c35635efSPaul Mackerras if (map) 1117c35635efSPaul Mackerras __set_bit_le(gfn - memslot->base_gfn, map); 1118c35635efSPaul Mackerras } 1119c35635efSPaul Mackerras 1120dfe49dbdSPaul Mackerras long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1121dfe49dbdSPaul Mackerras unsigned long *map) 112282ed3616SPaul Mackerras { 112382ed3616SPaul Mackerras unsigned long i; 1124dfe49dbdSPaul Mackerras unsigned long *rmapp; 1125c35635efSPaul Mackerras struct kvm_vcpu *vcpu; 112682ed3616SPaul Mackerras 112782ed3616SPaul Mackerras preempt_disable(); 1128d89cc617STakuya Yoshikawa rmapp = memslot->arch.rmap; 112982ed3616SPaul Mackerras for (i = 0; i < memslot->npages; ++i) { 1130dfe49dbdSPaul Mackerras if (kvm_test_clear_dirty(kvm, rmapp) && map) 113182ed3616SPaul Mackerras __set_bit_le(i, map); 113282ed3616SPaul Mackerras ++rmapp; 113382ed3616SPaul Mackerras } 1134c35635efSPaul Mackerras 1135c35635efSPaul Mackerras /* Harvest dirty bits from VPA and DTL updates */ 1136c35635efSPaul Mackerras /* Note: we never modify the SLB shadow buffer areas */ 1137c35635efSPaul Mackerras kvm_for_each_vcpu(i, vcpu, kvm) { 1138c35635efSPaul Mackerras spin_lock(&vcpu->arch.vpa_update_lock); 1139c35635efSPaul Mackerras harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); 1140c35635efSPaul Mackerras harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); 1141c35635efSPaul Mackerras spin_unlock(&vcpu->arch.vpa_update_lock); 1142c35635efSPaul Mackerras } 114382ed3616SPaul Mackerras preempt_enable(); 114482ed3616SPaul Mackerras return 0; 114582ed3616SPaul Mackerras } 114682ed3616SPaul Mackerras 114793e60249SPaul Mackerras void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 114893e60249SPaul Mackerras unsigned long *nb_ret) 114993e60249SPaul Mackerras { 115093e60249SPaul Mackerras struct kvm_memory_slot *memslot; 115193e60249SPaul Mackerras unsigned long gfn = gpa >> PAGE_SHIFT; 1152342d3db7SPaul Mackerras struct page *page, *pages[1]; 1153342d3db7SPaul Mackerras int npages; 1154c35635efSPaul Mackerras unsigned long hva, offset; 1155da9d1d7fSPaul Mackerras unsigned long pa; 115693e60249SPaul Mackerras unsigned long *physp; 11572c9097e4SPaul Mackerras int srcu_idx; 115893e60249SPaul Mackerras 11592c9097e4SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 116093e60249SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 116193e60249SPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 11622c9097e4SPaul Mackerras goto err; 1163342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) { 1164a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 116593e60249SPaul Mackerras if (!physp) 11662c9097e4SPaul Mackerras goto err; 1167da9d1d7fSPaul Mackerras physp += gfn - memslot->base_gfn; 116893e60249SPaul Mackerras pa = *physp; 1169c77162deSPaul Mackerras if (!pa) { 1170342d3db7SPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, 1171342d3db7SPaul Mackerras PAGE_SIZE) < 0) 11722c9097e4SPaul Mackerras goto err; 1173c77162deSPaul Mackerras pa = *physp; 1174c77162deSPaul Mackerras } 1175da9d1d7fSPaul Mackerras page = pfn_to_page(pa >> PAGE_SHIFT); 1176de6c0b02SDavid Gibson get_page(page); 1177342d3db7SPaul Mackerras } else { 1178342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 1179342d3db7SPaul Mackerras npages = get_user_pages_fast(hva, 1, 1, pages); 1180342d3db7SPaul Mackerras if (npages < 1) 11812c9097e4SPaul Mackerras goto err; 1182342d3db7SPaul Mackerras page = pages[0]; 1183342d3db7SPaul Mackerras } 11842c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11852c9097e4SPaul Mackerras 1186c35635efSPaul Mackerras offset = gpa & (PAGE_SIZE - 1); 118793e60249SPaul Mackerras if (nb_ret) 1188c35635efSPaul Mackerras *nb_ret = PAGE_SIZE - offset; 118993e60249SPaul Mackerras return page_address(page) + offset; 11902c9097e4SPaul Mackerras 11912c9097e4SPaul Mackerras err: 11922c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11932c9097e4SPaul Mackerras return NULL; 119493e60249SPaul Mackerras } 119593e60249SPaul Mackerras 1196c35635efSPaul Mackerras void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, 1197c35635efSPaul Mackerras bool dirty) 119893e60249SPaul Mackerras { 119993e60249SPaul Mackerras struct page *page = virt_to_page(va); 1200c35635efSPaul Mackerras struct kvm_memory_slot *memslot; 1201c35635efSPaul Mackerras unsigned long gfn; 1202c35635efSPaul Mackerras unsigned long *rmap; 1203c35635efSPaul Mackerras int srcu_idx; 120493e60249SPaul Mackerras 120593e60249SPaul Mackerras put_page(page); 1206c35635efSPaul Mackerras 1207c35635efSPaul Mackerras if (!dirty || !kvm->arch.using_mmu_notifiers) 1208c35635efSPaul Mackerras return; 1209c35635efSPaul Mackerras 1210c35635efSPaul Mackerras /* We need to mark this page dirty in the rmap chain */ 1211c35635efSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 1212c35635efSPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 1213c35635efSPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 1214c35635efSPaul Mackerras if (memslot) { 1215c35635efSPaul Mackerras rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1216c35635efSPaul Mackerras lock_rmap(rmap); 1217c35635efSPaul Mackerras *rmap |= KVMPPC_RMAP_CHANGED; 1218c35635efSPaul Mackerras unlock_rmap(rmap); 1219c35635efSPaul Mackerras } 1220c35635efSPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 122193e60249SPaul Mackerras } 122293e60249SPaul Mackerras 1223a2932923SPaul Mackerras /* 1224a2932923SPaul Mackerras * Functions for reading and writing the hash table via reads and 1225a2932923SPaul Mackerras * writes on a file descriptor. 1226a2932923SPaul Mackerras * 1227a2932923SPaul Mackerras * Reads return the guest view of the hash table, which has to be 1228a2932923SPaul Mackerras * pieced together from the real hash table and the guest_rpte 1229a2932923SPaul Mackerras * values in the revmap array. 1230a2932923SPaul Mackerras * 1231a2932923SPaul Mackerras * On writes, each HPTE written is considered in turn, and if it 1232a2932923SPaul Mackerras * is valid, it is written to the HPT as if an H_ENTER with the 1233a2932923SPaul Mackerras * exact flag set was done. When the invalid count is non-zero 1234a2932923SPaul Mackerras * in the header written to the stream, the kernel will make 1235a2932923SPaul Mackerras * sure that that many HPTEs are invalid, and invalidate them 1236a2932923SPaul Mackerras * if not. 1237a2932923SPaul Mackerras */ 1238a2932923SPaul Mackerras 1239a2932923SPaul Mackerras struct kvm_htab_ctx { 1240a2932923SPaul Mackerras unsigned long index; 1241a2932923SPaul Mackerras unsigned long flags; 1242a2932923SPaul Mackerras struct kvm *kvm; 1243a2932923SPaul Mackerras int first_pass; 1244a2932923SPaul Mackerras }; 1245a2932923SPaul Mackerras 1246a2932923SPaul Mackerras #define HPTE_SIZE (2 * sizeof(unsigned long)) 1247a2932923SPaul Mackerras 1248a1b4a0f6SPaul Mackerras /* 1249a1b4a0f6SPaul Mackerras * Returns 1 if this HPT entry has been modified or has pending 1250a1b4a0f6SPaul Mackerras * R/C bit changes. 1251a1b4a0f6SPaul Mackerras */ 1252a1b4a0f6SPaul Mackerras static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) 1253a1b4a0f6SPaul Mackerras { 1254a1b4a0f6SPaul Mackerras unsigned long rcbits_unset; 1255a1b4a0f6SPaul Mackerras 1256a1b4a0f6SPaul Mackerras if (revp->guest_rpte & HPTE_GR_MODIFIED) 1257a1b4a0f6SPaul Mackerras return 1; 1258a1b4a0f6SPaul Mackerras 1259a1b4a0f6SPaul Mackerras /* Also need to consider changes in reference and changed bits */ 1260a1b4a0f6SPaul Mackerras rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); 1261a1b4a0f6SPaul Mackerras if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) 1262a1b4a0f6SPaul Mackerras return 1; 1263a1b4a0f6SPaul Mackerras 1264a1b4a0f6SPaul Mackerras return 0; 1265a1b4a0f6SPaul Mackerras } 1266a1b4a0f6SPaul Mackerras 1267a2932923SPaul Mackerras static long record_hpte(unsigned long flags, unsigned long *hptp, 1268a2932923SPaul Mackerras unsigned long *hpte, struct revmap_entry *revp, 1269a2932923SPaul Mackerras int want_valid, int first_pass) 1270a2932923SPaul Mackerras { 1271a2932923SPaul Mackerras unsigned long v, r; 1272a1b4a0f6SPaul Mackerras unsigned long rcbits_unset; 1273a2932923SPaul Mackerras int ok = 1; 1274a2932923SPaul Mackerras int valid, dirty; 1275a2932923SPaul Mackerras 1276a2932923SPaul Mackerras /* Unmodified entries are uninteresting except on the first pass */ 1277a1b4a0f6SPaul Mackerras dirty = hpte_dirty(revp, hptp); 1278a2932923SPaul Mackerras if (!first_pass && !dirty) 1279a2932923SPaul Mackerras return 0; 1280a2932923SPaul Mackerras 1281a2932923SPaul Mackerras valid = 0; 1282a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { 1283a2932923SPaul Mackerras valid = 1; 1284a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && 1285a2932923SPaul Mackerras !(hptp[0] & HPTE_V_BOLTED)) 1286a2932923SPaul Mackerras valid = 0; 1287a2932923SPaul Mackerras } 1288a2932923SPaul Mackerras if (valid != want_valid) 1289a2932923SPaul Mackerras return 0; 1290a2932923SPaul Mackerras 1291a2932923SPaul Mackerras v = r = 0; 1292a2932923SPaul Mackerras if (valid || dirty) { 1293a2932923SPaul Mackerras /* lock the HPTE so it's stable and read it */ 1294a2932923SPaul Mackerras preempt_disable(); 1295a2932923SPaul Mackerras while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1296a2932923SPaul Mackerras cpu_relax(); 1297a2932923SPaul Mackerras v = hptp[0]; 1298a1b4a0f6SPaul Mackerras 1299a1b4a0f6SPaul Mackerras /* re-evaluate valid and dirty from synchronized HPTE value */ 1300a1b4a0f6SPaul Mackerras valid = !!(v & HPTE_V_VALID); 1301a1b4a0f6SPaul Mackerras dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1302a1b4a0f6SPaul Mackerras 1303a1b4a0f6SPaul Mackerras /* Harvest R and C into guest view if necessary */ 1304a1b4a0f6SPaul Mackerras rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); 1305a1b4a0f6SPaul Mackerras if (valid && (rcbits_unset & hptp[1])) { 1306a1b4a0f6SPaul Mackerras revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | 1307a1b4a0f6SPaul Mackerras HPTE_GR_MODIFIED; 1308a1b4a0f6SPaul Mackerras dirty = 1; 1309a1b4a0f6SPaul Mackerras } 1310a1b4a0f6SPaul Mackerras 1311a2932923SPaul Mackerras if (v & HPTE_V_ABSENT) { 1312a2932923SPaul Mackerras v &= ~HPTE_V_ABSENT; 1313a2932923SPaul Mackerras v |= HPTE_V_VALID; 1314a1b4a0f6SPaul Mackerras valid = 1; 1315a2932923SPaul Mackerras } 1316a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) 1317a2932923SPaul Mackerras valid = 0; 1318a1b4a0f6SPaul Mackerras 1319a1b4a0f6SPaul Mackerras r = revp->guest_rpte; 1320a2932923SPaul Mackerras /* only clear modified if this is the right sort of entry */ 1321a2932923SPaul Mackerras if (valid == want_valid && dirty) { 1322a2932923SPaul Mackerras r &= ~HPTE_GR_MODIFIED; 1323a2932923SPaul Mackerras revp->guest_rpte = r; 1324a2932923SPaul Mackerras } 1325a2932923SPaul Mackerras asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); 1326a2932923SPaul Mackerras hptp[0] &= ~HPTE_V_HVLOCK; 1327a2932923SPaul Mackerras preempt_enable(); 1328a2932923SPaul Mackerras if (!(valid == want_valid && (first_pass || dirty))) 1329a2932923SPaul Mackerras ok = 0; 1330a2932923SPaul Mackerras } 1331a2932923SPaul Mackerras hpte[0] = v; 1332a2932923SPaul Mackerras hpte[1] = r; 1333a2932923SPaul Mackerras return ok; 1334a2932923SPaul Mackerras } 1335a2932923SPaul Mackerras 1336a2932923SPaul Mackerras static ssize_t kvm_htab_read(struct file *file, char __user *buf, 1337a2932923SPaul Mackerras size_t count, loff_t *ppos) 1338a2932923SPaul Mackerras { 1339a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1340a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1341a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1342a2932923SPaul Mackerras unsigned long *hptp; 1343a2932923SPaul Mackerras struct revmap_entry *revp; 1344a2932923SPaul Mackerras unsigned long i, nb, nw; 1345a2932923SPaul Mackerras unsigned long __user *lbuf; 1346a2932923SPaul Mackerras struct kvm_get_htab_header __user *hptr; 1347a2932923SPaul Mackerras unsigned long flags; 1348a2932923SPaul Mackerras int first_pass; 1349a2932923SPaul Mackerras unsigned long hpte[2]; 1350a2932923SPaul Mackerras 1351a2932923SPaul Mackerras if (!access_ok(VERIFY_WRITE, buf, count)) 1352a2932923SPaul Mackerras return -EFAULT; 1353a2932923SPaul Mackerras 1354a2932923SPaul Mackerras first_pass = ctx->first_pass; 1355a2932923SPaul Mackerras flags = ctx->flags; 1356a2932923SPaul Mackerras 1357a2932923SPaul Mackerras i = ctx->index; 1358a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1359a2932923SPaul Mackerras revp = kvm->arch.revmap + i; 1360a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1361a2932923SPaul Mackerras 1362a2932923SPaul Mackerras nb = 0; 1363a2932923SPaul Mackerras while (nb + sizeof(hdr) + HPTE_SIZE < count) { 1364a2932923SPaul Mackerras /* Initialize header */ 1365a2932923SPaul Mackerras hptr = (struct kvm_get_htab_header __user *)buf; 1366a2932923SPaul Mackerras hdr.n_valid = 0; 1367a2932923SPaul Mackerras hdr.n_invalid = 0; 1368a2932923SPaul Mackerras nw = nb; 1369a2932923SPaul Mackerras nb += sizeof(hdr); 1370a2932923SPaul Mackerras lbuf = (unsigned long __user *)(buf + sizeof(hdr)); 1371a2932923SPaul Mackerras 1372a2932923SPaul Mackerras /* Skip uninteresting entries, i.e. clean on not-first pass */ 1373a2932923SPaul Mackerras if (!first_pass) { 1374a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1375a1b4a0f6SPaul Mackerras !hpte_dirty(revp, hptp)) { 1376a2932923SPaul Mackerras ++i; 1377a2932923SPaul Mackerras hptp += 2; 1378a2932923SPaul Mackerras ++revp; 1379a2932923SPaul Mackerras } 1380a2932923SPaul Mackerras } 138105dd85f7SPaul Mackerras hdr.index = i; 1382a2932923SPaul Mackerras 1383a2932923SPaul Mackerras /* Grab a series of valid entries */ 1384a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1385a2932923SPaul Mackerras hdr.n_valid < 0xffff && 1386a2932923SPaul Mackerras nb + HPTE_SIZE < count && 1387a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { 1388a2932923SPaul Mackerras /* valid entry, write it out */ 1389a2932923SPaul Mackerras ++hdr.n_valid; 1390a2932923SPaul Mackerras if (__put_user(hpte[0], lbuf) || 1391a2932923SPaul Mackerras __put_user(hpte[1], lbuf + 1)) 1392a2932923SPaul Mackerras return -EFAULT; 1393a2932923SPaul Mackerras nb += HPTE_SIZE; 1394a2932923SPaul Mackerras lbuf += 2; 1395a2932923SPaul Mackerras ++i; 1396a2932923SPaul Mackerras hptp += 2; 1397a2932923SPaul Mackerras ++revp; 1398a2932923SPaul Mackerras } 1399a2932923SPaul Mackerras /* Now skip invalid entries while we can */ 1400a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1401a2932923SPaul Mackerras hdr.n_invalid < 0xffff && 1402a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { 1403a2932923SPaul Mackerras /* found an invalid entry */ 1404a2932923SPaul Mackerras ++hdr.n_invalid; 1405a2932923SPaul Mackerras ++i; 1406a2932923SPaul Mackerras hptp += 2; 1407a2932923SPaul Mackerras ++revp; 1408a2932923SPaul Mackerras } 1409a2932923SPaul Mackerras 1410a2932923SPaul Mackerras if (hdr.n_valid || hdr.n_invalid) { 1411a2932923SPaul Mackerras /* write back the header */ 1412a2932923SPaul Mackerras if (__copy_to_user(hptr, &hdr, sizeof(hdr))) 1413a2932923SPaul Mackerras return -EFAULT; 1414a2932923SPaul Mackerras nw = nb; 1415a2932923SPaul Mackerras buf = (char __user *)lbuf; 1416a2932923SPaul Mackerras } else { 1417a2932923SPaul Mackerras nb = nw; 1418a2932923SPaul Mackerras } 1419a2932923SPaul Mackerras 1420a2932923SPaul Mackerras /* Check if we've wrapped around the hash table */ 1421a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte) { 1422a2932923SPaul Mackerras i = 0; 1423a2932923SPaul Mackerras ctx->first_pass = 0; 1424a2932923SPaul Mackerras break; 1425a2932923SPaul Mackerras } 1426a2932923SPaul Mackerras } 1427a2932923SPaul Mackerras 1428a2932923SPaul Mackerras ctx->index = i; 1429a2932923SPaul Mackerras 1430a2932923SPaul Mackerras return nb; 1431a2932923SPaul Mackerras } 1432a2932923SPaul Mackerras 1433a2932923SPaul Mackerras static ssize_t kvm_htab_write(struct file *file, const char __user *buf, 1434a2932923SPaul Mackerras size_t count, loff_t *ppos) 1435a2932923SPaul Mackerras { 1436a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1437a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1438a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1439a2932923SPaul Mackerras unsigned long i, j; 1440a2932923SPaul Mackerras unsigned long v, r; 1441a2932923SPaul Mackerras unsigned long __user *lbuf; 1442a2932923SPaul Mackerras unsigned long *hptp; 1443a2932923SPaul Mackerras unsigned long tmp[2]; 1444a2932923SPaul Mackerras ssize_t nb; 1445a2932923SPaul Mackerras long int err, ret; 1446a2932923SPaul Mackerras int rma_setup; 1447a2932923SPaul Mackerras 1448a2932923SPaul Mackerras if (!access_ok(VERIFY_READ, buf, count)) 1449a2932923SPaul Mackerras return -EFAULT; 1450a2932923SPaul Mackerras 1451a2932923SPaul Mackerras /* lock out vcpus from running while we're doing this */ 1452a2932923SPaul Mackerras mutex_lock(&kvm->lock); 1453a2932923SPaul Mackerras rma_setup = kvm->arch.rma_setup_done; 1454a2932923SPaul Mackerras if (rma_setup) { 1455a2932923SPaul Mackerras kvm->arch.rma_setup_done = 0; /* temporarily */ 1456a2932923SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 1457a2932923SPaul Mackerras smp_mb(); 1458a2932923SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 1459a2932923SPaul Mackerras kvm->arch.rma_setup_done = 1; 1460a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1461a2932923SPaul Mackerras return -EBUSY; 1462a2932923SPaul Mackerras } 1463a2932923SPaul Mackerras } 1464a2932923SPaul Mackerras 1465a2932923SPaul Mackerras err = 0; 1466a2932923SPaul Mackerras for (nb = 0; nb + sizeof(hdr) <= count; ) { 1467a2932923SPaul Mackerras err = -EFAULT; 1468a2932923SPaul Mackerras if (__copy_from_user(&hdr, buf, sizeof(hdr))) 1469a2932923SPaul Mackerras break; 1470a2932923SPaul Mackerras 1471a2932923SPaul Mackerras err = 0; 1472a2932923SPaul Mackerras if (nb + hdr.n_valid * HPTE_SIZE > count) 1473a2932923SPaul Mackerras break; 1474a2932923SPaul Mackerras 1475a2932923SPaul Mackerras nb += sizeof(hdr); 1476a2932923SPaul Mackerras buf += sizeof(hdr); 1477a2932923SPaul Mackerras 1478a2932923SPaul Mackerras err = -EINVAL; 1479a2932923SPaul Mackerras i = hdr.index; 1480a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte || 1481a2932923SPaul Mackerras i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) 1482a2932923SPaul Mackerras break; 1483a2932923SPaul Mackerras 1484a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1485a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1486a2932923SPaul Mackerras for (j = 0; j < hdr.n_valid; ++j) { 1487a2932923SPaul Mackerras err = -EFAULT; 1488a2932923SPaul Mackerras if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) 1489a2932923SPaul Mackerras goto out; 1490a2932923SPaul Mackerras err = -EINVAL; 1491a2932923SPaul Mackerras if (!(v & HPTE_V_VALID)) 1492a2932923SPaul Mackerras goto out; 1493a2932923SPaul Mackerras lbuf += 2; 1494a2932923SPaul Mackerras nb += HPTE_SIZE; 1495a2932923SPaul Mackerras 1496a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1497a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1498a2932923SPaul Mackerras err = -EIO; 1499a2932923SPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, 1500a2932923SPaul Mackerras tmp); 1501a2932923SPaul Mackerras if (ret != H_SUCCESS) { 1502a2932923SPaul Mackerras pr_err("kvm_htab_write ret %ld i=%ld v=%lx " 1503a2932923SPaul Mackerras "r=%lx\n", ret, i, v, r); 1504a2932923SPaul Mackerras goto out; 1505a2932923SPaul Mackerras } 1506a2932923SPaul Mackerras if (!rma_setup && is_vrma_hpte(v)) { 1507a2932923SPaul Mackerras unsigned long psize = hpte_page_size(v, r); 1508a2932923SPaul Mackerras unsigned long senc = slb_pgsize_encoding(psize); 1509a2932923SPaul Mackerras unsigned long lpcr; 1510a2932923SPaul Mackerras 1511a2932923SPaul Mackerras kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1512a2932923SPaul Mackerras (VRMA_VSID << SLB_VSID_SHIFT_1T); 1513a2932923SPaul Mackerras lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1514a2932923SPaul Mackerras lpcr |= senc << (LPCR_VRMASD_SH - 4); 1515a2932923SPaul Mackerras kvm->arch.lpcr = lpcr; 1516a2932923SPaul Mackerras rma_setup = 1; 1517a2932923SPaul Mackerras } 1518a2932923SPaul Mackerras ++i; 1519a2932923SPaul Mackerras hptp += 2; 1520a2932923SPaul Mackerras } 1521a2932923SPaul Mackerras 1522a2932923SPaul Mackerras for (j = 0; j < hdr.n_invalid; ++j) { 1523a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1524a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1525a2932923SPaul Mackerras ++i; 1526a2932923SPaul Mackerras hptp += 2; 1527a2932923SPaul Mackerras } 1528a2932923SPaul Mackerras err = 0; 1529a2932923SPaul Mackerras } 1530a2932923SPaul Mackerras 1531a2932923SPaul Mackerras out: 1532a2932923SPaul Mackerras /* Order HPTE updates vs. rma_setup_done */ 1533a2932923SPaul Mackerras smp_wmb(); 1534a2932923SPaul Mackerras kvm->arch.rma_setup_done = rma_setup; 1535a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1536a2932923SPaul Mackerras 1537a2932923SPaul Mackerras if (err) 1538a2932923SPaul Mackerras return err; 1539a2932923SPaul Mackerras return nb; 1540a2932923SPaul Mackerras } 1541a2932923SPaul Mackerras 1542a2932923SPaul Mackerras static int kvm_htab_release(struct inode *inode, struct file *filp) 1543a2932923SPaul Mackerras { 1544a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = filp->private_data; 1545a2932923SPaul Mackerras 1546a2932923SPaul Mackerras filp->private_data = NULL; 1547a2932923SPaul Mackerras if (!(ctx->flags & KVM_GET_HTAB_WRITE)) 1548a2932923SPaul Mackerras atomic_dec(&ctx->kvm->arch.hpte_mod_interest); 1549a2932923SPaul Mackerras kvm_put_kvm(ctx->kvm); 1550a2932923SPaul Mackerras kfree(ctx); 1551a2932923SPaul Mackerras return 0; 1552a2932923SPaul Mackerras } 1553a2932923SPaul Mackerras 155475ef9de1SAl Viro static const struct file_operations kvm_htab_fops = { 1555a2932923SPaul Mackerras .read = kvm_htab_read, 1556a2932923SPaul Mackerras .write = kvm_htab_write, 1557a2932923SPaul Mackerras .llseek = default_llseek, 1558a2932923SPaul Mackerras .release = kvm_htab_release, 1559a2932923SPaul Mackerras }; 1560a2932923SPaul Mackerras 1561a2932923SPaul Mackerras int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) 1562a2932923SPaul Mackerras { 1563a2932923SPaul Mackerras int ret; 1564a2932923SPaul Mackerras struct kvm_htab_ctx *ctx; 1565a2932923SPaul Mackerras int rwflag; 1566a2932923SPaul Mackerras 1567a2932923SPaul Mackerras /* reject flags we don't recognize */ 1568a2932923SPaul Mackerras if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) 1569a2932923SPaul Mackerras return -EINVAL; 1570a2932923SPaul Mackerras ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1571a2932923SPaul Mackerras if (!ctx) 1572a2932923SPaul Mackerras return -ENOMEM; 1573a2932923SPaul Mackerras kvm_get_kvm(kvm); 1574a2932923SPaul Mackerras ctx->kvm = kvm; 1575a2932923SPaul Mackerras ctx->index = ghf->start_index; 1576a2932923SPaul Mackerras ctx->flags = ghf->flags; 1577a2932923SPaul Mackerras ctx->first_pass = 1; 1578a2932923SPaul Mackerras 1579a2932923SPaul Mackerras rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; 1580a2932923SPaul Mackerras ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); 1581a2932923SPaul Mackerras if (ret < 0) { 1582a2932923SPaul Mackerras kvm_put_kvm(kvm); 1583a2932923SPaul Mackerras return ret; 1584a2932923SPaul Mackerras } 1585a2932923SPaul Mackerras 1586a2932923SPaul Mackerras if (rwflag == O_RDONLY) { 1587a2932923SPaul Mackerras mutex_lock(&kvm->slots_lock); 1588a2932923SPaul Mackerras atomic_inc(&kvm->arch.hpte_mod_interest); 1589a2932923SPaul Mackerras /* make sure kvmppc_do_h_enter etc. see the increment */ 1590a2932923SPaul Mackerras synchronize_srcu_expedited(&kvm->srcu); 1591a2932923SPaul Mackerras mutex_unlock(&kvm->slots_lock); 1592a2932923SPaul Mackerras } 1593a2932923SPaul Mackerras 1594a2932923SPaul Mackerras return ret; 1595a2932923SPaul Mackerras } 1596a2932923SPaul Mackerras 1597de56a948SPaul Mackerras void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1598de56a948SPaul Mackerras { 1599de56a948SPaul Mackerras struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1600de56a948SPaul Mackerras 16019e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) 16029e368f29SPaul Mackerras vcpu->arch.slb_nr = 32; /* POWER7 */ 16039e368f29SPaul Mackerras else 16049e368f29SPaul Mackerras vcpu->arch.slb_nr = 64; 1605de56a948SPaul Mackerras 1606de56a948SPaul Mackerras mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1607de56a948SPaul Mackerras mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1608de56a948SPaul Mackerras 1609de56a948SPaul Mackerras vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 1610de56a948SPaul Mackerras } 1611