1de56a948SPaul Mackerras /* 2de56a948SPaul Mackerras * This program is free software; you can redistribute it and/or modify 3de56a948SPaul Mackerras * it under the terms of the GNU General Public License, version 2, as 4de56a948SPaul Mackerras * published by the Free Software Foundation. 5de56a948SPaul Mackerras * 6de56a948SPaul Mackerras * This program is distributed in the hope that it will be useful, 7de56a948SPaul Mackerras * but WITHOUT ANY WARRANTY; without even the implied warranty of 8de56a948SPaul Mackerras * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9de56a948SPaul Mackerras * GNU General Public License for more details. 10de56a948SPaul Mackerras * 11de56a948SPaul Mackerras * You should have received a copy of the GNU General Public License 12de56a948SPaul Mackerras * along with this program; if not, write to the Free Software 13de56a948SPaul Mackerras * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14de56a948SPaul Mackerras * 15de56a948SPaul Mackerras * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16de56a948SPaul Mackerras */ 17de56a948SPaul Mackerras 18de56a948SPaul Mackerras #include <linux/types.h> 19de56a948SPaul Mackerras #include <linux/string.h> 20de56a948SPaul Mackerras #include <linux/kvm.h> 21de56a948SPaul Mackerras #include <linux/kvm_host.h> 22de56a948SPaul Mackerras #include <linux/highmem.h> 23de56a948SPaul Mackerras #include <linux/gfp.h> 24de56a948SPaul Mackerras #include <linux/slab.h> 25de56a948SPaul Mackerras #include <linux/hugetlb.h> 268936dda4SPaul Mackerras #include <linux/vmalloc.h> 272c9097e4SPaul Mackerras #include <linux/srcu.h> 28a2932923SPaul Mackerras #include <linux/anon_inodes.h> 29a2932923SPaul Mackerras #include <linux/file.h> 30de56a948SPaul Mackerras 31de56a948SPaul Mackerras #include <asm/tlbflush.h> 32de56a948SPaul Mackerras #include <asm/kvm_ppc.h> 33de56a948SPaul Mackerras #include <asm/kvm_book3s.h> 34de56a948SPaul Mackerras #include <asm/mmu-hash64.h> 35de56a948SPaul Mackerras #include <asm/hvcall.h> 36de56a948SPaul Mackerras #include <asm/synch.h> 37de56a948SPaul Mackerras #include <asm/ppc-opcode.h> 38de56a948SPaul Mackerras #include <asm/cputable.h> 39de56a948SPaul Mackerras 409e368f29SPaul Mackerras /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 419e368f29SPaul Mackerras #define MAX_LPID_970 63 42de56a948SPaul Mackerras 4332fad281SPaul Mackerras /* Power architecture requires HPT is at least 256kB */ 4432fad281SPaul Mackerras #define PPC_MIN_HPT_ORDER 18 4532fad281SPaul Mackerras 467ed661bfSPaul Mackerras static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 477ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 487ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret); 49a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm); 507ed661bfSPaul Mackerras 5132fad281SPaul Mackerras long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 52de56a948SPaul Mackerras { 53de56a948SPaul Mackerras unsigned long hpt; 548936dda4SPaul Mackerras struct revmap_entry *rev; 55d2a1b483SAlexander Graf struct kvmppc_linear_info *li; 5632fad281SPaul Mackerras long order = kvm_hpt_order; 57de56a948SPaul Mackerras 5832fad281SPaul Mackerras if (htab_orderp) { 5932fad281SPaul Mackerras order = *htab_orderp; 6032fad281SPaul Mackerras if (order < PPC_MIN_HPT_ORDER) 6132fad281SPaul Mackerras order = PPC_MIN_HPT_ORDER; 6232fad281SPaul Mackerras } 6332fad281SPaul Mackerras 6432fad281SPaul Mackerras /* 6532fad281SPaul Mackerras * If the user wants a different size from default, 6632fad281SPaul Mackerras * try first to allocate it from the kernel page allocator. 6732fad281SPaul Mackerras */ 6832fad281SPaul Mackerras hpt = 0; 6932fad281SPaul Mackerras if (order != kvm_hpt_order) { 7032fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 7132fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 7232fad281SPaul Mackerras if (!hpt) 7332fad281SPaul Mackerras --order; 7432fad281SPaul Mackerras } 7532fad281SPaul Mackerras 7632fad281SPaul Mackerras /* Next try to allocate from the preallocated pool */ 7732fad281SPaul Mackerras if (!hpt) { 78d2a1b483SAlexander Graf li = kvm_alloc_hpt(); 79d2a1b483SAlexander Graf if (li) { 80d2a1b483SAlexander Graf hpt = (ulong)li->base_virt; 81d2a1b483SAlexander Graf kvm->arch.hpt_li = li; 8232fad281SPaul Mackerras order = kvm_hpt_order; 8332fad281SPaul Mackerras } 84d2a1b483SAlexander Graf } 85d2a1b483SAlexander Graf 8632fad281SPaul Mackerras /* Lastly try successively smaller sizes from the page allocator */ 8732fad281SPaul Mackerras while (!hpt && order > PPC_MIN_HPT_ORDER) { 8832fad281SPaul Mackerras hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 8932fad281SPaul Mackerras __GFP_NOWARN, order - PAGE_SHIFT); 9032fad281SPaul Mackerras if (!hpt) 9132fad281SPaul Mackerras --order; 92de56a948SPaul Mackerras } 9332fad281SPaul Mackerras 9432fad281SPaul Mackerras if (!hpt) 9532fad281SPaul Mackerras return -ENOMEM; 9632fad281SPaul Mackerras 97de56a948SPaul Mackerras kvm->arch.hpt_virt = hpt; 9832fad281SPaul Mackerras kvm->arch.hpt_order = order; 9932fad281SPaul Mackerras /* HPTEs are 2**4 bytes long */ 10032fad281SPaul Mackerras kvm->arch.hpt_npte = 1ul << (order - 4); 10132fad281SPaul Mackerras /* 128 (2**7) bytes in each HPTEG */ 10232fad281SPaul Mackerras kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; 103de56a948SPaul Mackerras 1048936dda4SPaul Mackerras /* Allocate reverse map array */ 10532fad281SPaul Mackerras rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 1068936dda4SPaul Mackerras if (!rev) { 1078936dda4SPaul Mackerras pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); 1088936dda4SPaul Mackerras goto out_freehpt; 1098936dda4SPaul Mackerras } 1108936dda4SPaul Mackerras kvm->arch.revmap = rev; 11132fad281SPaul Mackerras kvm->arch.sdr1 = __pa(hpt) | (order - 18); 1128936dda4SPaul Mackerras 11332fad281SPaul Mackerras pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", 11432fad281SPaul Mackerras hpt, order, kvm->arch.lpid); 115de56a948SPaul Mackerras 11632fad281SPaul Mackerras if (htab_orderp) 11732fad281SPaul Mackerras *htab_orderp = order; 118de56a948SPaul Mackerras return 0; 1198936dda4SPaul Mackerras 1208936dda4SPaul Mackerras out_freehpt: 12132fad281SPaul Mackerras if (kvm->arch.hpt_li) 12232fad281SPaul Mackerras kvm_release_hpt(kvm->arch.hpt_li); 12332fad281SPaul Mackerras else 12432fad281SPaul Mackerras free_pages(hpt, order - PAGE_SHIFT); 1258936dda4SPaul Mackerras return -ENOMEM; 126de56a948SPaul Mackerras } 127de56a948SPaul Mackerras 12832fad281SPaul Mackerras long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) 12932fad281SPaul Mackerras { 13032fad281SPaul Mackerras long err = -EBUSY; 13132fad281SPaul Mackerras long order; 13232fad281SPaul Mackerras 13332fad281SPaul Mackerras mutex_lock(&kvm->lock); 13432fad281SPaul Mackerras if (kvm->arch.rma_setup_done) { 13532fad281SPaul Mackerras kvm->arch.rma_setup_done = 0; 13632fad281SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 13732fad281SPaul Mackerras smp_mb(); 13832fad281SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 13932fad281SPaul Mackerras kvm->arch.rma_setup_done = 1; 14032fad281SPaul Mackerras goto out; 14132fad281SPaul Mackerras } 14232fad281SPaul Mackerras } 14332fad281SPaul Mackerras if (kvm->arch.hpt_virt) { 14432fad281SPaul Mackerras order = kvm->arch.hpt_order; 14532fad281SPaul Mackerras /* Set the entire HPT to 0, i.e. invalid HPTEs */ 14632fad281SPaul Mackerras memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 14732fad281SPaul Mackerras /* 148a64fd707SPaul Mackerras * Reset all the reverse-mapping chains for all memslots 149a64fd707SPaul Mackerras */ 150a64fd707SPaul Mackerras kvmppc_rmap_reset(kvm); 151a64fd707SPaul Mackerras /* 15232fad281SPaul Mackerras * Set the whole last_vcpu array to an invalid vcpu number. 15332fad281SPaul Mackerras * This ensures that each vcpu will flush its TLB on next entry. 15432fad281SPaul Mackerras */ 15532fad281SPaul Mackerras memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); 15632fad281SPaul Mackerras *htab_orderp = order; 15732fad281SPaul Mackerras err = 0; 15832fad281SPaul Mackerras } else { 15932fad281SPaul Mackerras err = kvmppc_alloc_hpt(kvm, htab_orderp); 16032fad281SPaul Mackerras order = *htab_orderp; 16132fad281SPaul Mackerras } 16232fad281SPaul Mackerras out: 16332fad281SPaul Mackerras mutex_unlock(&kvm->lock); 16432fad281SPaul Mackerras return err; 16532fad281SPaul Mackerras } 16632fad281SPaul Mackerras 167de56a948SPaul Mackerras void kvmppc_free_hpt(struct kvm *kvm) 168de56a948SPaul Mackerras { 169043cc4d7SScott Wood kvmppc_free_lpid(kvm->arch.lpid); 1708936dda4SPaul Mackerras vfree(kvm->arch.revmap); 171d2a1b483SAlexander Graf if (kvm->arch.hpt_li) 172d2a1b483SAlexander Graf kvm_release_hpt(kvm->arch.hpt_li); 173d2a1b483SAlexander Graf else 17432fad281SPaul Mackerras free_pages(kvm->arch.hpt_virt, 17532fad281SPaul Mackerras kvm->arch.hpt_order - PAGE_SHIFT); 176de56a948SPaul Mackerras } 177de56a948SPaul Mackerras 178da9d1d7fSPaul Mackerras /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ 179da9d1d7fSPaul Mackerras static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) 180de56a948SPaul Mackerras { 181da9d1d7fSPaul Mackerras return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; 182da9d1d7fSPaul Mackerras } 183da9d1d7fSPaul Mackerras 184da9d1d7fSPaul Mackerras /* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ 185da9d1d7fSPaul Mackerras static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) 186da9d1d7fSPaul Mackerras { 187da9d1d7fSPaul Mackerras return (pgsize == 0x10000) ? 0x1000 : 0; 188da9d1d7fSPaul Mackerras } 189da9d1d7fSPaul Mackerras 190da9d1d7fSPaul Mackerras void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, 191da9d1d7fSPaul Mackerras unsigned long porder) 192da9d1d7fSPaul Mackerras { 193de56a948SPaul Mackerras unsigned long i; 194b2b2f165SPaul Mackerras unsigned long npages; 195c77162deSPaul Mackerras unsigned long hp_v, hp_r; 196c77162deSPaul Mackerras unsigned long addr, hash; 197da9d1d7fSPaul Mackerras unsigned long psize; 198da9d1d7fSPaul Mackerras unsigned long hp0, hp1; 1997ed661bfSPaul Mackerras unsigned long idx_ret; 200c77162deSPaul Mackerras long ret; 20132fad281SPaul Mackerras struct kvm *kvm = vcpu->kvm; 202de56a948SPaul Mackerras 203da9d1d7fSPaul Mackerras psize = 1ul << porder; 204da9d1d7fSPaul Mackerras npages = memslot->npages >> (porder - PAGE_SHIFT); 205de56a948SPaul Mackerras 206de56a948SPaul Mackerras /* VRMA can't be > 1TB */ 2078936dda4SPaul Mackerras if (npages > 1ul << (40 - porder)) 2088936dda4SPaul Mackerras npages = 1ul << (40 - porder); 209de56a948SPaul Mackerras /* Can't use more than 1 HPTE per HPTEG */ 21032fad281SPaul Mackerras if (npages > kvm->arch.hpt_mask + 1) 21132fad281SPaul Mackerras npages = kvm->arch.hpt_mask + 1; 212de56a948SPaul Mackerras 213da9d1d7fSPaul Mackerras hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 214da9d1d7fSPaul Mackerras HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); 215da9d1d7fSPaul Mackerras hp1 = hpte1_pgsize_encoding(psize) | 216da9d1d7fSPaul Mackerras HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; 217da9d1d7fSPaul Mackerras 218de56a948SPaul Mackerras for (i = 0; i < npages; ++i) { 219c77162deSPaul Mackerras addr = i << porder; 220de56a948SPaul Mackerras /* can't use hpt_hash since va > 64 bits */ 22132fad281SPaul Mackerras hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; 222de56a948SPaul Mackerras /* 223de56a948SPaul Mackerras * We assume that the hash table is empty and no 224de56a948SPaul Mackerras * vcpus are using it at this stage. Since we create 225de56a948SPaul Mackerras * at most one HPTE per HPTEG, we just assume entry 7 226de56a948SPaul Mackerras * is available and use it. 227de56a948SPaul Mackerras */ 2288936dda4SPaul Mackerras hash = (hash << 3) + 7; 229da9d1d7fSPaul Mackerras hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 230da9d1d7fSPaul Mackerras hp_r = hp1 | addr; 2317ed661bfSPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r, 2327ed661bfSPaul Mackerras &idx_ret); 233c77162deSPaul Mackerras if (ret != H_SUCCESS) { 234c77162deSPaul Mackerras pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 235c77162deSPaul Mackerras addr, ret); 236c77162deSPaul Mackerras break; 237c77162deSPaul Mackerras } 238de56a948SPaul Mackerras } 239de56a948SPaul Mackerras } 240de56a948SPaul Mackerras 241de56a948SPaul Mackerras int kvmppc_mmu_hv_init(void) 242de56a948SPaul Mackerras { 2439e368f29SPaul Mackerras unsigned long host_lpid, rsvd_lpid; 2449e368f29SPaul Mackerras 2459e368f29SPaul Mackerras if (!cpu_has_feature(CPU_FTR_HVMODE)) 246de56a948SPaul Mackerras return -EINVAL; 2479e368f29SPaul Mackerras 248043cc4d7SScott Wood /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 2499e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) { 2509e368f29SPaul Mackerras host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 2519e368f29SPaul Mackerras rsvd_lpid = LPID_RSVD; 2529e368f29SPaul Mackerras } else { 2539e368f29SPaul Mackerras host_lpid = 0; /* PPC970 */ 2549e368f29SPaul Mackerras rsvd_lpid = MAX_LPID_970; 2559e368f29SPaul Mackerras } 2569e368f29SPaul Mackerras 257043cc4d7SScott Wood kvmppc_init_lpid(rsvd_lpid + 1); 258043cc4d7SScott Wood 259043cc4d7SScott Wood kvmppc_claim_lpid(host_lpid); 2609e368f29SPaul Mackerras /* rsvd_lpid is reserved for use in partition switching */ 261043cc4d7SScott Wood kvmppc_claim_lpid(rsvd_lpid); 262de56a948SPaul Mackerras 263de56a948SPaul Mackerras return 0; 264de56a948SPaul Mackerras } 265de56a948SPaul Mackerras 266de56a948SPaul Mackerras void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 267de56a948SPaul Mackerras { 268de56a948SPaul Mackerras } 269de56a948SPaul Mackerras 270de56a948SPaul Mackerras static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 271de56a948SPaul Mackerras { 272de56a948SPaul Mackerras kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 273de56a948SPaul Mackerras } 274de56a948SPaul Mackerras 275c77162deSPaul Mackerras /* 276c77162deSPaul Mackerras * This is called to get a reference to a guest page if there isn't 277a66b48c3SPaul Mackerras * one already in the memslot->arch.slot_phys[] array. 278c77162deSPaul Mackerras */ 279c77162deSPaul Mackerras static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, 280da9d1d7fSPaul Mackerras struct kvm_memory_slot *memslot, 281da9d1d7fSPaul Mackerras unsigned long psize) 282c77162deSPaul Mackerras { 283c77162deSPaul Mackerras unsigned long start; 284da9d1d7fSPaul Mackerras long np, err; 285da9d1d7fSPaul Mackerras struct page *page, *hpage, *pages[1]; 286da9d1d7fSPaul Mackerras unsigned long s, pgsize; 287c77162deSPaul Mackerras unsigned long *physp; 2889d0ef5eaSPaul Mackerras unsigned int is_io, got, pgorder; 2899d0ef5eaSPaul Mackerras struct vm_area_struct *vma; 290da9d1d7fSPaul Mackerras unsigned long pfn, i, npages; 291c77162deSPaul Mackerras 292a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 293c77162deSPaul Mackerras if (!physp) 294c77162deSPaul Mackerras return -EINVAL; 295da9d1d7fSPaul Mackerras if (physp[gfn - memslot->base_gfn]) 296c77162deSPaul Mackerras return 0; 297c77162deSPaul Mackerras 2989d0ef5eaSPaul Mackerras is_io = 0; 2999d0ef5eaSPaul Mackerras got = 0; 300c77162deSPaul Mackerras page = NULL; 301da9d1d7fSPaul Mackerras pgsize = psize; 3029d0ef5eaSPaul Mackerras err = -EINVAL; 303c77162deSPaul Mackerras start = gfn_to_hva_memslot(memslot, gfn); 304c77162deSPaul Mackerras 305c77162deSPaul Mackerras /* Instantiate and get the page we want access to */ 306c77162deSPaul Mackerras np = get_user_pages_fast(start, 1, 1, pages); 3079d0ef5eaSPaul Mackerras if (np != 1) { 3089d0ef5eaSPaul Mackerras /* Look up the vma for the page */ 3099d0ef5eaSPaul Mackerras down_read(¤t->mm->mmap_sem); 3109d0ef5eaSPaul Mackerras vma = find_vma(current->mm, start); 3119d0ef5eaSPaul Mackerras if (!vma || vma->vm_start > start || 3129d0ef5eaSPaul Mackerras start + psize > vma->vm_end || 3139d0ef5eaSPaul Mackerras !(vma->vm_flags & VM_PFNMAP)) 3149d0ef5eaSPaul Mackerras goto up_err; 3159d0ef5eaSPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 3169d0ef5eaSPaul Mackerras pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 3179d0ef5eaSPaul Mackerras /* check alignment of pfn vs. requested page size */ 3189d0ef5eaSPaul Mackerras if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) 3199d0ef5eaSPaul Mackerras goto up_err; 3209d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3219d0ef5eaSPaul Mackerras 3229d0ef5eaSPaul Mackerras } else { 323c77162deSPaul Mackerras page = pages[0]; 324da9d1d7fSPaul Mackerras got = KVMPPC_GOT_PAGE; 325c77162deSPaul Mackerras 326da9d1d7fSPaul Mackerras /* See if this is a large page */ 327da9d1d7fSPaul Mackerras s = PAGE_SIZE; 328da9d1d7fSPaul Mackerras if (PageHuge(page)) { 329da9d1d7fSPaul Mackerras hpage = compound_head(page); 330da9d1d7fSPaul Mackerras s <<= compound_order(hpage); 331da9d1d7fSPaul Mackerras /* Get the whole large page if slot alignment is ok */ 332da9d1d7fSPaul Mackerras if (s > psize && slot_is_aligned(memslot, s) && 333da9d1d7fSPaul Mackerras !(memslot->userspace_addr & (s - 1))) { 334da9d1d7fSPaul Mackerras start &= ~(s - 1); 335da9d1d7fSPaul Mackerras pgsize = s; 336de6c0b02SDavid Gibson get_page(hpage); 337de6c0b02SDavid Gibson put_page(page); 338da9d1d7fSPaul Mackerras page = hpage; 339c77162deSPaul Mackerras } 340da9d1d7fSPaul Mackerras } 341da9d1d7fSPaul Mackerras if (s < psize) 342da9d1d7fSPaul Mackerras goto out; 343c77162deSPaul Mackerras pfn = page_to_pfn(page); 3449d0ef5eaSPaul Mackerras } 345c77162deSPaul Mackerras 346da9d1d7fSPaul Mackerras npages = pgsize >> PAGE_SHIFT; 347da9d1d7fSPaul Mackerras pgorder = __ilog2(npages); 348da9d1d7fSPaul Mackerras physp += (gfn - memslot->base_gfn) & ~(npages - 1); 349c77162deSPaul Mackerras spin_lock(&kvm->arch.slot_phys_lock); 350da9d1d7fSPaul Mackerras for (i = 0; i < npages; ++i) { 351da9d1d7fSPaul Mackerras if (!physp[i]) { 3529d0ef5eaSPaul Mackerras physp[i] = ((pfn + i) << PAGE_SHIFT) + 3539d0ef5eaSPaul Mackerras got + is_io + pgorder; 354da9d1d7fSPaul Mackerras got = 0; 355da9d1d7fSPaul Mackerras } 356da9d1d7fSPaul Mackerras } 357c77162deSPaul Mackerras spin_unlock(&kvm->arch.slot_phys_lock); 358da9d1d7fSPaul Mackerras err = 0; 359c77162deSPaul Mackerras 360da9d1d7fSPaul Mackerras out: 361de6c0b02SDavid Gibson if (got) 362da9d1d7fSPaul Mackerras put_page(page); 363da9d1d7fSPaul Mackerras return err; 3649d0ef5eaSPaul Mackerras 3659d0ef5eaSPaul Mackerras up_err: 3669d0ef5eaSPaul Mackerras up_read(¤t->mm->mmap_sem); 3679d0ef5eaSPaul Mackerras return err; 368c77162deSPaul Mackerras } 369c77162deSPaul Mackerras 3707ed661bfSPaul Mackerras long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 3717ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 3727ed661bfSPaul Mackerras unsigned long ptel, unsigned long *pte_idx_ret) 373c77162deSPaul Mackerras { 374c77162deSPaul Mackerras unsigned long psize, gpa, gfn; 375c77162deSPaul Mackerras struct kvm_memory_slot *memslot; 376c77162deSPaul Mackerras long ret; 377c77162deSPaul Mackerras 378342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 379342d3db7SPaul Mackerras goto do_insert; 380342d3db7SPaul Mackerras 381c77162deSPaul Mackerras psize = hpte_page_size(pteh, ptel); 382c77162deSPaul Mackerras if (!psize) 383c77162deSPaul Mackerras return H_PARAMETER; 384c77162deSPaul Mackerras 385697d3899SPaul Mackerras pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 386697d3899SPaul Mackerras 387c77162deSPaul Mackerras /* Find the memslot (if any) for this address */ 388c77162deSPaul Mackerras gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 389c77162deSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 390c77162deSPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 391697d3899SPaul Mackerras if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { 392da9d1d7fSPaul Mackerras if (!slot_is_aligned(memslot, psize)) 393da9d1d7fSPaul Mackerras return H_PARAMETER; 394da9d1d7fSPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) 395c77162deSPaul Mackerras return H_PARAMETER; 396697d3899SPaul Mackerras } 397c77162deSPaul Mackerras 398342d3db7SPaul Mackerras do_insert: 399342d3db7SPaul Mackerras /* Protect linux PTE lookup from page table destruction */ 400342d3db7SPaul Mackerras rcu_read_lock_sched(); /* this disables preemption too */ 4017ed661bfSPaul Mackerras ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, 4027ed661bfSPaul Mackerras current->mm->pgd, false, pte_idx_ret); 403342d3db7SPaul Mackerras rcu_read_unlock_sched(); 404c77162deSPaul Mackerras if (ret == H_TOO_HARD) { 405c77162deSPaul Mackerras /* this can't happen */ 406c77162deSPaul Mackerras pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 407c77162deSPaul Mackerras ret = H_RESOURCE; /* or something */ 408c77162deSPaul Mackerras } 409c77162deSPaul Mackerras return ret; 410c77162deSPaul Mackerras 411c77162deSPaul Mackerras } 412c77162deSPaul Mackerras 4137ed661bfSPaul Mackerras /* 4147ed661bfSPaul Mackerras * We come here on a H_ENTER call from the guest when we are not 4157ed661bfSPaul Mackerras * using mmu notifiers and we don't have the requested page pinned 4167ed661bfSPaul Mackerras * already. 4177ed661bfSPaul Mackerras */ 4187ed661bfSPaul Mackerras long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 4197ed661bfSPaul Mackerras long pte_index, unsigned long pteh, 4207ed661bfSPaul Mackerras unsigned long ptel) 4217ed661bfSPaul Mackerras { 4227ed661bfSPaul Mackerras return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, 4237ed661bfSPaul Mackerras pteh, ptel, &vcpu->arch.gpr[4]); 4247ed661bfSPaul Mackerras } 4257ed661bfSPaul Mackerras 426697d3899SPaul Mackerras static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 427697d3899SPaul Mackerras gva_t eaddr) 428697d3899SPaul Mackerras { 429697d3899SPaul Mackerras u64 mask; 430697d3899SPaul Mackerras int i; 431697d3899SPaul Mackerras 432697d3899SPaul Mackerras for (i = 0; i < vcpu->arch.slb_nr; i++) { 433697d3899SPaul Mackerras if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) 434697d3899SPaul Mackerras continue; 435697d3899SPaul Mackerras 436697d3899SPaul Mackerras if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) 437697d3899SPaul Mackerras mask = ESID_MASK_1T; 438697d3899SPaul Mackerras else 439697d3899SPaul Mackerras mask = ESID_MASK; 440697d3899SPaul Mackerras 441697d3899SPaul Mackerras if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) 442697d3899SPaul Mackerras return &vcpu->arch.slb[i]; 443697d3899SPaul Mackerras } 444697d3899SPaul Mackerras return NULL; 445697d3899SPaul Mackerras } 446697d3899SPaul Mackerras 447697d3899SPaul Mackerras static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, 448697d3899SPaul Mackerras unsigned long ea) 449697d3899SPaul Mackerras { 450697d3899SPaul Mackerras unsigned long ra_mask; 451697d3899SPaul Mackerras 452697d3899SPaul Mackerras ra_mask = hpte_page_size(v, r) - 1; 453697d3899SPaul Mackerras return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); 454697d3899SPaul Mackerras } 455697d3899SPaul Mackerras 456de56a948SPaul Mackerras static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 457de56a948SPaul Mackerras struct kvmppc_pte *gpte, bool data) 458de56a948SPaul Mackerras { 459697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 460697d3899SPaul Mackerras struct kvmppc_slb *slbe; 461697d3899SPaul Mackerras unsigned long slb_v; 462697d3899SPaul Mackerras unsigned long pp, key; 463697d3899SPaul Mackerras unsigned long v, gr; 464697d3899SPaul Mackerras unsigned long *hptep; 465697d3899SPaul Mackerras int index; 466697d3899SPaul Mackerras int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 467697d3899SPaul Mackerras 468697d3899SPaul Mackerras /* Get SLB entry */ 469697d3899SPaul Mackerras if (virtmode) { 470697d3899SPaul Mackerras slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); 471697d3899SPaul Mackerras if (!slbe) 472697d3899SPaul Mackerras return -EINVAL; 473697d3899SPaul Mackerras slb_v = slbe->origv; 474697d3899SPaul Mackerras } else { 475697d3899SPaul Mackerras /* real mode access */ 476697d3899SPaul Mackerras slb_v = vcpu->kvm->arch.vrma_slb_v; 477697d3899SPaul Mackerras } 478697d3899SPaul Mackerras 479697d3899SPaul Mackerras /* Find the HPTE in the hash table */ 480697d3899SPaul Mackerras index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, 481697d3899SPaul Mackerras HPTE_V_VALID | HPTE_V_ABSENT); 482697d3899SPaul Mackerras if (index < 0) 483de56a948SPaul Mackerras return -ENOENT; 484697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 485697d3899SPaul Mackerras v = hptep[0] & ~HPTE_V_HVLOCK; 486697d3899SPaul Mackerras gr = kvm->arch.revmap[index].guest_rpte; 487697d3899SPaul Mackerras 488697d3899SPaul Mackerras /* Unlock the HPTE */ 489697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 490697d3899SPaul Mackerras hptep[0] = v; 491697d3899SPaul Mackerras 492697d3899SPaul Mackerras gpte->eaddr = eaddr; 493697d3899SPaul Mackerras gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); 494697d3899SPaul Mackerras 495697d3899SPaul Mackerras /* Get PP bits and key for permission check */ 496697d3899SPaul Mackerras pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 497697d3899SPaul Mackerras key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 498697d3899SPaul Mackerras key &= slb_v; 499697d3899SPaul Mackerras 500697d3899SPaul Mackerras /* Calculate permissions */ 501697d3899SPaul Mackerras gpte->may_read = hpte_read_permission(pp, key); 502697d3899SPaul Mackerras gpte->may_write = hpte_write_permission(pp, key); 503697d3899SPaul Mackerras gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); 504697d3899SPaul Mackerras 505697d3899SPaul Mackerras /* Storage key permission check for POWER7 */ 506697d3899SPaul Mackerras if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { 507697d3899SPaul Mackerras int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); 508697d3899SPaul Mackerras if (amrfield & 1) 509697d3899SPaul Mackerras gpte->may_read = 0; 510697d3899SPaul Mackerras if (amrfield & 2) 511697d3899SPaul Mackerras gpte->may_write = 0; 512697d3899SPaul Mackerras } 513697d3899SPaul Mackerras 514697d3899SPaul Mackerras /* Get the guest physical address */ 515697d3899SPaul Mackerras gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); 516697d3899SPaul Mackerras return 0; 517697d3899SPaul Mackerras } 518697d3899SPaul Mackerras 519697d3899SPaul Mackerras /* 520697d3899SPaul Mackerras * Quick test for whether an instruction is a load or a store. 521697d3899SPaul Mackerras * If the instruction is a load or a store, then this will indicate 522697d3899SPaul Mackerras * which it is, at least on server processors. (Embedded processors 523697d3899SPaul Mackerras * have some external PID instructions that don't follow the rule 524697d3899SPaul Mackerras * embodied here.) If the instruction isn't a load or store, then 525697d3899SPaul Mackerras * this doesn't return anything useful. 526697d3899SPaul Mackerras */ 527697d3899SPaul Mackerras static int instruction_is_store(unsigned int instr) 528697d3899SPaul Mackerras { 529697d3899SPaul Mackerras unsigned int mask; 530697d3899SPaul Mackerras 531697d3899SPaul Mackerras mask = 0x10000000; 532697d3899SPaul Mackerras if ((instr & 0xfc000000) == 0x7c000000) 533697d3899SPaul Mackerras mask = 0x100; /* major opcode 31 */ 534697d3899SPaul Mackerras return (instr & mask) != 0; 535697d3899SPaul Mackerras } 536697d3899SPaul Mackerras 537697d3899SPaul Mackerras static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 5386020c0f6SAlexander Graf unsigned long gpa, gva_t ea, int is_store) 539697d3899SPaul Mackerras { 540697d3899SPaul Mackerras int ret; 541697d3899SPaul Mackerras u32 last_inst; 542697d3899SPaul Mackerras unsigned long srr0 = kvmppc_get_pc(vcpu); 543697d3899SPaul Mackerras 544697d3899SPaul Mackerras /* We try to load the last instruction. We don't let 545697d3899SPaul Mackerras * emulate_instruction do it as it doesn't check what 546697d3899SPaul Mackerras * kvmppc_ld returns. 547697d3899SPaul Mackerras * If we fail, we just return to the guest and try executing it again. 548697d3899SPaul Mackerras */ 549697d3899SPaul Mackerras if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { 550697d3899SPaul Mackerras ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 551697d3899SPaul Mackerras if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) 552697d3899SPaul Mackerras return RESUME_GUEST; 553697d3899SPaul Mackerras vcpu->arch.last_inst = last_inst; 554697d3899SPaul Mackerras } 555697d3899SPaul Mackerras 556697d3899SPaul Mackerras /* 557697d3899SPaul Mackerras * WARNING: We do not know for sure whether the instruction we just 558697d3899SPaul Mackerras * read from memory is the same that caused the fault in the first 559697d3899SPaul Mackerras * place. If the instruction we read is neither an load or a store, 560697d3899SPaul Mackerras * then it can't access memory, so we don't need to worry about 561697d3899SPaul Mackerras * enforcing access permissions. So, assuming it is a load or 562697d3899SPaul Mackerras * store, we just check that its direction (load or store) is 563697d3899SPaul Mackerras * consistent with the original fault, since that's what we 564697d3899SPaul Mackerras * checked the access permissions against. If there is a mismatch 565697d3899SPaul Mackerras * we just return and retry the instruction. 566697d3899SPaul Mackerras */ 567697d3899SPaul Mackerras 568697d3899SPaul Mackerras if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) 569697d3899SPaul Mackerras return RESUME_GUEST; 570697d3899SPaul Mackerras 571697d3899SPaul Mackerras /* 572697d3899SPaul Mackerras * Emulated accesses are emulated by looking at the hash for 573697d3899SPaul Mackerras * translation once, then performing the access later. The 574697d3899SPaul Mackerras * translation could be invalidated in the meantime in which 575697d3899SPaul Mackerras * point performing the subsequent memory access on the old 576697d3899SPaul Mackerras * physical address could possibly be a security hole for the 577697d3899SPaul Mackerras * guest (but not the host). 578697d3899SPaul Mackerras * 579697d3899SPaul Mackerras * This is less of an issue for MMIO stores since they aren't 580697d3899SPaul Mackerras * globally visible. It could be an issue for MMIO loads to 581697d3899SPaul Mackerras * a certain extent but we'll ignore it for now. 582697d3899SPaul Mackerras */ 583697d3899SPaul Mackerras 584697d3899SPaul Mackerras vcpu->arch.paddr_accessed = gpa; 5856020c0f6SAlexander Graf vcpu->arch.vaddr_accessed = ea; 586697d3899SPaul Mackerras return kvmppc_emulate_mmio(run, vcpu); 587697d3899SPaul Mackerras } 588697d3899SPaul Mackerras 589697d3899SPaul Mackerras int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 590697d3899SPaul Mackerras unsigned long ea, unsigned long dsisr) 591697d3899SPaul Mackerras { 592697d3899SPaul Mackerras struct kvm *kvm = vcpu->kvm; 593342d3db7SPaul Mackerras unsigned long *hptep, hpte[3], r; 594342d3db7SPaul Mackerras unsigned long mmu_seq, psize, pte_size; 59570bddfefSPaul Mackerras unsigned long gpa, gfn, hva, pfn; 596697d3899SPaul Mackerras struct kvm_memory_slot *memslot; 597342d3db7SPaul Mackerras unsigned long *rmap; 598697d3899SPaul Mackerras struct revmap_entry *rev; 599342d3db7SPaul Mackerras struct page *page, *pages[1]; 600342d3db7SPaul Mackerras long index, ret, npages; 601342d3db7SPaul Mackerras unsigned long is_io; 6024cf302bcSPaul Mackerras unsigned int writing, write_ok; 603342d3db7SPaul Mackerras struct vm_area_struct *vma; 604bad3b507SPaul Mackerras unsigned long rcbits; 605697d3899SPaul Mackerras 606697d3899SPaul Mackerras /* 607697d3899SPaul Mackerras * Real-mode code has already searched the HPT and found the 608697d3899SPaul Mackerras * entry we're interested in. Lock the entry and check that 609697d3899SPaul Mackerras * it hasn't changed. If it has, just return and re-execute the 610697d3899SPaul Mackerras * instruction. 611697d3899SPaul Mackerras */ 612697d3899SPaul Mackerras if (ea != vcpu->arch.pgfault_addr) 613697d3899SPaul Mackerras return RESUME_GUEST; 614697d3899SPaul Mackerras index = vcpu->arch.pgfault_index; 615697d3899SPaul Mackerras hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 616697d3899SPaul Mackerras rev = &kvm->arch.revmap[index]; 617697d3899SPaul Mackerras preempt_disable(); 618697d3899SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 619697d3899SPaul Mackerras cpu_relax(); 620697d3899SPaul Mackerras hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 621697d3899SPaul Mackerras hpte[1] = hptep[1]; 622342d3db7SPaul Mackerras hpte[2] = r = rev->guest_rpte; 623697d3899SPaul Mackerras asm volatile("lwsync" : : : "memory"); 624697d3899SPaul Mackerras hptep[0] = hpte[0]; 625697d3899SPaul Mackerras preempt_enable(); 626697d3899SPaul Mackerras 627697d3899SPaul Mackerras if (hpte[0] != vcpu->arch.pgfault_hpte[0] || 628697d3899SPaul Mackerras hpte[1] != vcpu->arch.pgfault_hpte[1]) 629697d3899SPaul Mackerras return RESUME_GUEST; 630697d3899SPaul Mackerras 631697d3899SPaul Mackerras /* Translate the logical address and get the page */ 632342d3db7SPaul Mackerras psize = hpte_page_size(hpte[0], r); 63370bddfefSPaul Mackerras gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); 63470bddfefSPaul Mackerras gfn = gpa >> PAGE_SHIFT; 635697d3899SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 636697d3899SPaul Mackerras 637697d3899SPaul Mackerras /* No memslot means it's an emulated MMIO region */ 63870bddfefSPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 6396020c0f6SAlexander Graf return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 640697d3899SPaul Mackerras dsisr & DSISR_ISSTORE); 641697d3899SPaul Mackerras 642342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 643342d3db7SPaul Mackerras return -EFAULT; /* should never get here */ 644342d3db7SPaul Mackerras 645342d3db7SPaul Mackerras /* used to check for invalidations in progress */ 646342d3db7SPaul Mackerras mmu_seq = kvm->mmu_notifier_seq; 647342d3db7SPaul Mackerras smp_rmb(); 648342d3db7SPaul Mackerras 649342d3db7SPaul Mackerras is_io = 0; 650342d3db7SPaul Mackerras pfn = 0; 651342d3db7SPaul Mackerras page = NULL; 652342d3db7SPaul Mackerras pte_size = PAGE_SIZE; 6534cf302bcSPaul Mackerras writing = (dsisr & DSISR_ISSTORE) != 0; 6544cf302bcSPaul Mackerras /* If writing != 0, then the HPTE must allow writing, if we get here */ 6554cf302bcSPaul Mackerras write_ok = writing; 656342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 6574cf302bcSPaul Mackerras npages = get_user_pages_fast(hva, 1, writing, pages); 658342d3db7SPaul Mackerras if (npages < 1) { 659342d3db7SPaul Mackerras /* Check if it's an I/O mapping */ 660342d3db7SPaul Mackerras down_read(¤t->mm->mmap_sem); 661342d3db7SPaul Mackerras vma = find_vma(current->mm, hva); 662342d3db7SPaul Mackerras if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && 663342d3db7SPaul Mackerras (vma->vm_flags & VM_PFNMAP)) { 664342d3db7SPaul Mackerras pfn = vma->vm_pgoff + 665342d3db7SPaul Mackerras ((hva - vma->vm_start) >> PAGE_SHIFT); 666342d3db7SPaul Mackerras pte_size = psize; 667342d3db7SPaul Mackerras is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 6684cf302bcSPaul Mackerras write_ok = vma->vm_flags & VM_WRITE; 669342d3db7SPaul Mackerras } 670342d3db7SPaul Mackerras up_read(¤t->mm->mmap_sem); 671342d3db7SPaul Mackerras if (!pfn) 672697d3899SPaul Mackerras return -EFAULT; 673342d3db7SPaul Mackerras } else { 674342d3db7SPaul Mackerras page = pages[0]; 675342d3db7SPaul Mackerras if (PageHuge(page)) { 676342d3db7SPaul Mackerras page = compound_head(page); 677342d3db7SPaul Mackerras pte_size <<= compound_order(page); 678342d3db7SPaul Mackerras } 6794cf302bcSPaul Mackerras /* if the guest wants write access, see if that is OK */ 6804cf302bcSPaul Mackerras if (!writing && hpte_is_writable(r)) { 6814cf302bcSPaul Mackerras pte_t *ptep, pte; 6824cf302bcSPaul Mackerras 6834cf302bcSPaul Mackerras /* 6844cf302bcSPaul Mackerras * We need to protect against page table destruction 6854cf302bcSPaul Mackerras * while looking up and updating the pte. 6864cf302bcSPaul Mackerras */ 6874cf302bcSPaul Mackerras rcu_read_lock_sched(); 6884cf302bcSPaul Mackerras ptep = find_linux_pte_or_hugepte(current->mm->pgd, 6894cf302bcSPaul Mackerras hva, NULL); 6904cf302bcSPaul Mackerras if (ptep && pte_present(*ptep)) { 6914cf302bcSPaul Mackerras pte = kvmppc_read_update_linux_pte(ptep, 1); 6924cf302bcSPaul Mackerras if (pte_write(pte)) 6934cf302bcSPaul Mackerras write_ok = 1; 6944cf302bcSPaul Mackerras } 6954cf302bcSPaul Mackerras rcu_read_unlock_sched(); 6964cf302bcSPaul Mackerras } 697342d3db7SPaul Mackerras pfn = page_to_pfn(page); 698342d3db7SPaul Mackerras } 699342d3db7SPaul Mackerras 700342d3db7SPaul Mackerras ret = -EFAULT; 701342d3db7SPaul Mackerras if (psize > pte_size) 702342d3db7SPaul Mackerras goto out_put; 703342d3db7SPaul Mackerras 704342d3db7SPaul Mackerras /* Check WIMG vs. the actual page we're accessing */ 705342d3db7SPaul Mackerras if (!hpte_cache_flags_ok(r, is_io)) { 706342d3db7SPaul Mackerras if (is_io) 707342d3db7SPaul Mackerras return -EFAULT; 708342d3db7SPaul Mackerras /* 709342d3db7SPaul Mackerras * Allow guest to map emulated device memory as 710342d3db7SPaul Mackerras * uncacheable, but actually make it cacheable. 711342d3db7SPaul Mackerras */ 712342d3db7SPaul Mackerras r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; 713342d3db7SPaul Mackerras } 714342d3db7SPaul Mackerras 715342d3db7SPaul Mackerras /* Set the HPTE to point to pfn */ 716342d3db7SPaul Mackerras r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); 7174cf302bcSPaul Mackerras if (hpte_is_writable(r) && !write_ok) 7184cf302bcSPaul Mackerras r = hpte_make_readonly(r); 719342d3db7SPaul Mackerras ret = RESUME_GUEST; 720342d3db7SPaul Mackerras preempt_disable(); 721342d3db7SPaul Mackerras while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 722342d3db7SPaul Mackerras cpu_relax(); 723342d3db7SPaul Mackerras if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || 724342d3db7SPaul Mackerras rev->guest_rpte != hpte[2]) 725342d3db7SPaul Mackerras /* HPTE has been changed under us; let the guest retry */ 726342d3db7SPaul Mackerras goto out_unlock; 727342d3db7SPaul Mackerras hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 728342d3db7SPaul Mackerras 729d89cc617STakuya Yoshikawa rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 730342d3db7SPaul Mackerras lock_rmap(rmap); 731342d3db7SPaul Mackerras 732342d3db7SPaul Mackerras /* Check if we might have been invalidated; let the guest retry if so */ 733342d3db7SPaul Mackerras ret = RESUME_GUEST; 7348ca40a70SChristoffer Dall if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 735342d3db7SPaul Mackerras unlock_rmap(rmap); 736342d3db7SPaul Mackerras goto out_unlock; 737342d3db7SPaul Mackerras } 7384cf302bcSPaul Mackerras 739bad3b507SPaul Mackerras /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ 740bad3b507SPaul Mackerras rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 741bad3b507SPaul Mackerras r &= rcbits | ~(HPTE_R_R | HPTE_R_C); 742bad3b507SPaul Mackerras 7434cf302bcSPaul Mackerras if (hptep[0] & HPTE_V_VALID) { 7444cf302bcSPaul Mackerras /* HPTE was previously valid, so we need to invalidate it */ 7454cf302bcSPaul Mackerras unlock_rmap(rmap); 7464cf302bcSPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 7474cf302bcSPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, index); 748bad3b507SPaul Mackerras /* don't lose previous R and C bits */ 749bad3b507SPaul Mackerras r |= hptep[1] & (HPTE_R_R | HPTE_R_C); 7504cf302bcSPaul Mackerras } else { 751342d3db7SPaul Mackerras kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 7524cf302bcSPaul Mackerras } 753342d3db7SPaul Mackerras 754342d3db7SPaul Mackerras hptep[1] = r; 755342d3db7SPaul Mackerras eieio(); 756342d3db7SPaul Mackerras hptep[0] = hpte[0]; 757342d3db7SPaul Mackerras asm volatile("ptesync" : : : "memory"); 758342d3db7SPaul Mackerras preempt_enable(); 7594cf302bcSPaul Mackerras if (page && hpte_is_writable(r)) 760342d3db7SPaul Mackerras SetPageDirty(page); 761342d3db7SPaul Mackerras 762342d3db7SPaul Mackerras out_put: 763de6c0b02SDavid Gibson if (page) { 764de6c0b02SDavid Gibson /* 765de6c0b02SDavid Gibson * We drop pages[0] here, not page because page might 766de6c0b02SDavid Gibson * have been set to the head page of a compound, but 767de6c0b02SDavid Gibson * we have to drop the reference on the correct tail 768de6c0b02SDavid Gibson * page to match the get inside gup() 769de6c0b02SDavid Gibson */ 770de6c0b02SDavid Gibson put_page(pages[0]); 771de6c0b02SDavid Gibson } 772342d3db7SPaul Mackerras return ret; 773342d3db7SPaul Mackerras 774342d3db7SPaul Mackerras out_unlock: 775342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 776342d3db7SPaul Mackerras preempt_enable(); 777342d3db7SPaul Mackerras goto out_put; 778342d3db7SPaul Mackerras } 779342d3db7SPaul Mackerras 780a64fd707SPaul Mackerras static void kvmppc_rmap_reset(struct kvm *kvm) 781a64fd707SPaul Mackerras { 782a64fd707SPaul Mackerras struct kvm_memslots *slots; 783a64fd707SPaul Mackerras struct kvm_memory_slot *memslot; 784a64fd707SPaul Mackerras int srcu_idx; 785a64fd707SPaul Mackerras 786a64fd707SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 787a64fd707SPaul Mackerras slots = kvm->memslots; 788a64fd707SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 789a64fd707SPaul Mackerras /* 790a64fd707SPaul Mackerras * This assumes it is acceptable to lose reference and 791a64fd707SPaul Mackerras * change bits across a reset. 792a64fd707SPaul Mackerras */ 793a64fd707SPaul Mackerras memset(memslot->arch.rmap, 0, 794a64fd707SPaul Mackerras memslot->npages * sizeof(*memslot->arch.rmap)); 795a64fd707SPaul Mackerras } 796a64fd707SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 797a64fd707SPaul Mackerras } 798a64fd707SPaul Mackerras 79984504ef3STakuya Yoshikawa static int kvm_handle_hva_range(struct kvm *kvm, 80084504ef3STakuya Yoshikawa unsigned long start, 80184504ef3STakuya Yoshikawa unsigned long end, 80284504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, 80384504ef3STakuya Yoshikawa unsigned long *rmapp, 804342d3db7SPaul Mackerras unsigned long gfn)) 805342d3db7SPaul Mackerras { 806342d3db7SPaul Mackerras int ret; 807342d3db7SPaul Mackerras int retval = 0; 808342d3db7SPaul Mackerras struct kvm_memslots *slots; 809342d3db7SPaul Mackerras struct kvm_memory_slot *memslot; 810342d3db7SPaul Mackerras 811342d3db7SPaul Mackerras slots = kvm_memslots(kvm); 812342d3db7SPaul Mackerras kvm_for_each_memslot(memslot, slots) { 81384504ef3STakuya Yoshikawa unsigned long hva_start, hva_end; 81484504ef3STakuya Yoshikawa gfn_t gfn, gfn_end; 815342d3db7SPaul Mackerras 81684504ef3STakuya Yoshikawa hva_start = max(start, memslot->userspace_addr); 81784504ef3STakuya Yoshikawa hva_end = min(end, memslot->userspace_addr + 81884504ef3STakuya Yoshikawa (memslot->npages << PAGE_SHIFT)); 81984504ef3STakuya Yoshikawa if (hva_start >= hva_end) 82084504ef3STakuya Yoshikawa continue; 82184504ef3STakuya Yoshikawa /* 82284504ef3STakuya Yoshikawa * {gfn(page) | page intersects with [hva_start, hva_end)} = 82384504ef3STakuya Yoshikawa * {gfn, gfn+1, ..., gfn_end-1}. 82484504ef3STakuya Yoshikawa */ 82584504ef3STakuya Yoshikawa gfn = hva_to_gfn_memslot(hva_start, memslot); 82684504ef3STakuya Yoshikawa gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 82784504ef3STakuya Yoshikawa 82884504ef3STakuya Yoshikawa for (; gfn < gfn_end; ++gfn) { 829d19a748bSTakuya Yoshikawa gfn_t gfn_offset = gfn - memslot->base_gfn; 830342d3db7SPaul Mackerras 831d89cc617STakuya Yoshikawa ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); 832342d3db7SPaul Mackerras retval |= ret; 833342d3db7SPaul Mackerras } 834342d3db7SPaul Mackerras } 835342d3db7SPaul Mackerras 836342d3db7SPaul Mackerras return retval; 837342d3db7SPaul Mackerras } 838342d3db7SPaul Mackerras 83984504ef3STakuya Yoshikawa static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 84084504ef3STakuya Yoshikawa int (*handler)(struct kvm *kvm, unsigned long *rmapp, 84184504ef3STakuya Yoshikawa unsigned long gfn)) 84284504ef3STakuya Yoshikawa { 84384504ef3STakuya Yoshikawa return kvm_handle_hva_range(kvm, hva, hva + 1, handler); 84484504ef3STakuya Yoshikawa } 84584504ef3STakuya Yoshikawa 846342d3db7SPaul Mackerras static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 847342d3db7SPaul Mackerras unsigned long gfn) 848342d3db7SPaul Mackerras { 849342d3db7SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 850342d3db7SPaul Mackerras unsigned long h, i, j; 851342d3db7SPaul Mackerras unsigned long *hptep; 852bad3b507SPaul Mackerras unsigned long ptel, psize, rcbits; 853342d3db7SPaul Mackerras 854342d3db7SPaul Mackerras for (;;) { 855bad3b507SPaul Mackerras lock_rmap(rmapp); 856342d3db7SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 857bad3b507SPaul Mackerras unlock_rmap(rmapp); 858342d3db7SPaul Mackerras break; 859342d3db7SPaul Mackerras } 860342d3db7SPaul Mackerras 861342d3db7SPaul Mackerras /* 862342d3db7SPaul Mackerras * To avoid an ABBA deadlock with the HPTE lock bit, 863bad3b507SPaul Mackerras * we can't spin on the HPTE lock while holding the 864bad3b507SPaul Mackerras * rmap chain lock. 865342d3db7SPaul Mackerras */ 866342d3db7SPaul Mackerras i = *rmapp & KVMPPC_RMAP_INDEX; 867bad3b507SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 868bad3b507SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 869bad3b507SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 870bad3b507SPaul Mackerras unlock_rmap(rmapp); 871bad3b507SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 872bad3b507SPaul Mackerras cpu_relax(); 873bad3b507SPaul Mackerras continue; 874bad3b507SPaul Mackerras } 875342d3db7SPaul Mackerras j = rev[i].forw; 876342d3db7SPaul Mackerras if (j == i) { 877342d3db7SPaul Mackerras /* chain is now empty */ 878bad3b507SPaul Mackerras *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 879342d3db7SPaul Mackerras } else { 880342d3db7SPaul Mackerras /* remove i from chain */ 881342d3db7SPaul Mackerras h = rev[i].back; 882342d3db7SPaul Mackerras rev[h].forw = j; 883342d3db7SPaul Mackerras rev[j].back = h; 884342d3db7SPaul Mackerras rev[i].forw = rev[i].back = i; 885bad3b507SPaul Mackerras *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; 886342d3db7SPaul Mackerras } 887342d3db7SPaul Mackerras 888bad3b507SPaul Mackerras /* Now check and modify the HPTE */ 889342d3db7SPaul Mackerras ptel = rev[i].guest_rpte; 890342d3db7SPaul Mackerras psize = hpte_page_size(hptep[0], ptel); 891342d3db7SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && 892342d3db7SPaul Mackerras hpte_rpn(ptel, psize) == gfn) { 893dfe49dbdSPaul Mackerras if (kvm->arch.using_mmu_notifiers) 894342d3db7SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 895bad3b507SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 896bad3b507SPaul Mackerras /* Harvest R and C */ 897bad3b507SPaul Mackerras rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 898bad3b507SPaul Mackerras *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 899bad3b507SPaul Mackerras rev[i].guest_rpte = ptel | rcbits; 900342d3db7SPaul Mackerras } 901bad3b507SPaul Mackerras unlock_rmap(rmapp); 902342d3db7SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 903342d3db7SPaul Mackerras } 904342d3db7SPaul Mackerras return 0; 905342d3db7SPaul Mackerras } 906342d3db7SPaul Mackerras 907342d3db7SPaul Mackerras int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 908342d3db7SPaul Mackerras { 909342d3db7SPaul Mackerras if (kvm->arch.using_mmu_notifiers) 910342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 911342d3db7SPaul Mackerras return 0; 912342d3db7SPaul Mackerras } 913342d3db7SPaul Mackerras 914b3ae2096STakuya Yoshikawa int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 915b3ae2096STakuya Yoshikawa { 916b3ae2096STakuya Yoshikawa if (kvm->arch.using_mmu_notifiers) 917b3ae2096STakuya Yoshikawa kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 918b3ae2096STakuya Yoshikawa return 0; 919b3ae2096STakuya Yoshikawa } 920b3ae2096STakuya Yoshikawa 921dfe49dbdSPaul Mackerras void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 922dfe49dbdSPaul Mackerras { 923dfe49dbdSPaul Mackerras unsigned long *rmapp; 924dfe49dbdSPaul Mackerras unsigned long gfn; 925dfe49dbdSPaul Mackerras unsigned long n; 926dfe49dbdSPaul Mackerras 927dfe49dbdSPaul Mackerras rmapp = memslot->arch.rmap; 928dfe49dbdSPaul Mackerras gfn = memslot->base_gfn; 929dfe49dbdSPaul Mackerras for (n = memslot->npages; n; --n) { 930dfe49dbdSPaul Mackerras /* 931dfe49dbdSPaul Mackerras * Testing the present bit without locking is OK because 932dfe49dbdSPaul Mackerras * the memslot has been marked invalid already, and hence 933dfe49dbdSPaul Mackerras * no new HPTEs referencing this page can be created, 934dfe49dbdSPaul Mackerras * thus the present bit can't go from 0 to 1. 935dfe49dbdSPaul Mackerras */ 936dfe49dbdSPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) 937dfe49dbdSPaul Mackerras kvm_unmap_rmapp(kvm, rmapp, gfn); 938dfe49dbdSPaul Mackerras ++rmapp; 939dfe49dbdSPaul Mackerras ++gfn; 940dfe49dbdSPaul Mackerras } 941dfe49dbdSPaul Mackerras } 942dfe49dbdSPaul Mackerras 943342d3db7SPaul Mackerras static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 944342d3db7SPaul Mackerras unsigned long gfn) 945342d3db7SPaul Mackerras { 94655514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 94755514893SPaul Mackerras unsigned long head, i, j; 94855514893SPaul Mackerras unsigned long *hptep; 94955514893SPaul Mackerras int ret = 0; 95055514893SPaul Mackerras 95155514893SPaul Mackerras retry: 95255514893SPaul Mackerras lock_rmap(rmapp); 95355514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) { 954bad3b507SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_REFERENCED; 95555514893SPaul Mackerras ret = 1; 95655514893SPaul Mackerras } 95755514893SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 95855514893SPaul Mackerras unlock_rmap(rmapp); 95955514893SPaul Mackerras return ret; 96055514893SPaul Mackerras } 96155514893SPaul Mackerras 96255514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 96355514893SPaul Mackerras do { 96455514893SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 96555514893SPaul Mackerras j = rev[i].forw; 96655514893SPaul Mackerras 96755514893SPaul Mackerras /* If this HPTE isn't referenced, ignore it */ 96855514893SPaul Mackerras if (!(hptep[1] & HPTE_R_R)) 96955514893SPaul Mackerras continue; 97055514893SPaul Mackerras 97155514893SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 97255514893SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 97355514893SPaul Mackerras unlock_rmap(rmapp); 97455514893SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 97555514893SPaul Mackerras cpu_relax(); 97655514893SPaul Mackerras goto retry; 97755514893SPaul Mackerras } 97855514893SPaul Mackerras 97955514893SPaul Mackerras /* Now check and modify the HPTE */ 98055514893SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 98155514893SPaul Mackerras kvmppc_clear_ref_hpte(kvm, hptep, i); 98255514893SPaul Mackerras rev[i].guest_rpte |= HPTE_R_R; 98355514893SPaul Mackerras ret = 1; 98455514893SPaul Mackerras } 98555514893SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 98655514893SPaul Mackerras } while ((i = j) != head); 98755514893SPaul Mackerras 98855514893SPaul Mackerras unlock_rmap(rmapp); 98955514893SPaul Mackerras return ret; 990342d3db7SPaul Mackerras } 991342d3db7SPaul Mackerras 992342d3db7SPaul Mackerras int kvm_age_hva(struct kvm *kvm, unsigned long hva) 993342d3db7SPaul Mackerras { 994342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 995342d3db7SPaul Mackerras return 0; 996342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 997342d3db7SPaul Mackerras } 998342d3db7SPaul Mackerras 999342d3db7SPaul Mackerras static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1000342d3db7SPaul Mackerras unsigned long gfn) 1001342d3db7SPaul Mackerras { 100255514893SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 100355514893SPaul Mackerras unsigned long head, i, j; 100455514893SPaul Mackerras unsigned long *hp; 100555514893SPaul Mackerras int ret = 1; 100655514893SPaul Mackerras 100755514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 100855514893SPaul Mackerras return 1; 100955514893SPaul Mackerras 101055514893SPaul Mackerras lock_rmap(rmapp); 101155514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_REFERENCED) 101255514893SPaul Mackerras goto out; 101355514893SPaul Mackerras 101455514893SPaul Mackerras if (*rmapp & KVMPPC_RMAP_PRESENT) { 101555514893SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 101655514893SPaul Mackerras do { 101755514893SPaul Mackerras hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); 101855514893SPaul Mackerras j = rev[i].forw; 101955514893SPaul Mackerras if (hp[1] & HPTE_R_R) 102055514893SPaul Mackerras goto out; 102155514893SPaul Mackerras } while ((i = j) != head); 102255514893SPaul Mackerras } 102355514893SPaul Mackerras ret = 0; 102455514893SPaul Mackerras 102555514893SPaul Mackerras out: 102655514893SPaul Mackerras unlock_rmap(rmapp); 102755514893SPaul Mackerras return ret; 1028342d3db7SPaul Mackerras } 1029342d3db7SPaul Mackerras 1030342d3db7SPaul Mackerras int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1031342d3db7SPaul Mackerras { 1032342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1033342d3db7SPaul Mackerras return 0; 1034342d3db7SPaul Mackerras return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1035342d3db7SPaul Mackerras } 1036342d3db7SPaul Mackerras 1037342d3db7SPaul Mackerras void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1038342d3db7SPaul Mackerras { 1039342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) 1040342d3db7SPaul Mackerras return; 1041342d3db7SPaul Mackerras kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 1042de56a948SPaul Mackerras } 1043de56a948SPaul Mackerras 104482ed3616SPaul Mackerras static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 104582ed3616SPaul Mackerras { 104682ed3616SPaul Mackerras struct revmap_entry *rev = kvm->arch.revmap; 104782ed3616SPaul Mackerras unsigned long head, i, j; 104882ed3616SPaul Mackerras unsigned long *hptep; 104982ed3616SPaul Mackerras int ret = 0; 105082ed3616SPaul Mackerras 105182ed3616SPaul Mackerras retry: 105282ed3616SPaul Mackerras lock_rmap(rmapp); 105382ed3616SPaul Mackerras if (*rmapp & KVMPPC_RMAP_CHANGED) { 105482ed3616SPaul Mackerras *rmapp &= ~KVMPPC_RMAP_CHANGED; 105582ed3616SPaul Mackerras ret = 1; 105682ed3616SPaul Mackerras } 105782ed3616SPaul Mackerras if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 105882ed3616SPaul Mackerras unlock_rmap(rmapp); 105982ed3616SPaul Mackerras return ret; 106082ed3616SPaul Mackerras } 106182ed3616SPaul Mackerras 106282ed3616SPaul Mackerras i = head = *rmapp & KVMPPC_RMAP_INDEX; 106382ed3616SPaul Mackerras do { 106482ed3616SPaul Mackerras hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 106582ed3616SPaul Mackerras j = rev[i].forw; 106682ed3616SPaul Mackerras 106782ed3616SPaul Mackerras if (!(hptep[1] & HPTE_R_C)) 106882ed3616SPaul Mackerras continue; 106982ed3616SPaul Mackerras 107082ed3616SPaul Mackerras if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 107182ed3616SPaul Mackerras /* unlock rmap before spinning on the HPTE lock */ 107282ed3616SPaul Mackerras unlock_rmap(rmapp); 107382ed3616SPaul Mackerras while (hptep[0] & HPTE_V_HVLOCK) 107482ed3616SPaul Mackerras cpu_relax(); 107582ed3616SPaul Mackerras goto retry; 107682ed3616SPaul Mackerras } 107782ed3616SPaul Mackerras 107882ed3616SPaul Mackerras /* Now check and modify the HPTE */ 107982ed3616SPaul Mackerras if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { 108082ed3616SPaul Mackerras /* need to make it temporarily absent to clear C */ 108182ed3616SPaul Mackerras hptep[0] |= HPTE_V_ABSENT; 108282ed3616SPaul Mackerras kvmppc_invalidate_hpte(kvm, hptep, i); 108382ed3616SPaul Mackerras hptep[1] &= ~HPTE_R_C; 108482ed3616SPaul Mackerras eieio(); 108582ed3616SPaul Mackerras hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 108682ed3616SPaul Mackerras rev[i].guest_rpte |= HPTE_R_C; 108782ed3616SPaul Mackerras ret = 1; 108882ed3616SPaul Mackerras } 108982ed3616SPaul Mackerras hptep[0] &= ~HPTE_V_HVLOCK; 109082ed3616SPaul Mackerras } while ((i = j) != head); 109182ed3616SPaul Mackerras 109282ed3616SPaul Mackerras unlock_rmap(rmapp); 109382ed3616SPaul Mackerras return ret; 109482ed3616SPaul Mackerras } 109582ed3616SPaul Mackerras 1096dfe49dbdSPaul Mackerras long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1097dfe49dbdSPaul Mackerras unsigned long *map) 109882ed3616SPaul Mackerras { 109982ed3616SPaul Mackerras unsigned long i; 1100dfe49dbdSPaul Mackerras unsigned long *rmapp; 110182ed3616SPaul Mackerras 110282ed3616SPaul Mackerras preempt_disable(); 1103d89cc617STakuya Yoshikawa rmapp = memslot->arch.rmap; 110482ed3616SPaul Mackerras for (i = 0; i < memslot->npages; ++i) { 1105dfe49dbdSPaul Mackerras if (kvm_test_clear_dirty(kvm, rmapp) && map) 110682ed3616SPaul Mackerras __set_bit_le(i, map); 110782ed3616SPaul Mackerras ++rmapp; 110882ed3616SPaul Mackerras } 110982ed3616SPaul Mackerras preempt_enable(); 111082ed3616SPaul Mackerras return 0; 111182ed3616SPaul Mackerras } 111282ed3616SPaul Mackerras 111393e60249SPaul Mackerras void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 111493e60249SPaul Mackerras unsigned long *nb_ret) 111593e60249SPaul Mackerras { 111693e60249SPaul Mackerras struct kvm_memory_slot *memslot; 111793e60249SPaul Mackerras unsigned long gfn = gpa >> PAGE_SHIFT; 1118342d3db7SPaul Mackerras struct page *page, *pages[1]; 1119342d3db7SPaul Mackerras int npages; 1120342d3db7SPaul Mackerras unsigned long hva, psize, offset; 1121da9d1d7fSPaul Mackerras unsigned long pa; 112293e60249SPaul Mackerras unsigned long *physp; 11232c9097e4SPaul Mackerras int srcu_idx; 112493e60249SPaul Mackerras 11252c9097e4SPaul Mackerras srcu_idx = srcu_read_lock(&kvm->srcu); 112693e60249SPaul Mackerras memslot = gfn_to_memslot(kvm, gfn); 112793e60249SPaul Mackerras if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 11282c9097e4SPaul Mackerras goto err; 1129342d3db7SPaul Mackerras if (!kvm->arch.using_mmu_notifiers) { 1130a66b48c3SPaul Mackerras physp = memslot->arch.slot_phys; 113193e60249SPaul Mackerras if (!physp) 11322c9097e4SPaul Mackerras goto err; 1133da9d1d7fSPaul Mackerras physp += gfn - memslot->base_gfn; 113493e60249SPaul Mackerras pa = *physp; 1135c77162deSPaul Mackerras if (!pa) { 1136342d3db7SPaul Mackerras if (kvmppc_get_guest_page(kvm, gfn, memslot, 1137342d3db7SPaul Mackerras PAGE_SIZE) < 0) 11382c9097e4SPaul Mackerras goto err; 1139c77162deSPaul Mackerras pa = *physp; 1140c77162deSPaul Mackerras } 1141da9d1d7fSPaul Mackerras page = pfn_to_page(pa >> PAGE_SHIFT); 1142de6c0b02SDavid Gibson get_page(page); 1143342d3db7SPaul Mackerras } else { 1144342d3db7SPaul Mackerras hva = gfn_to_hva_memslot(memslot, gfn); 1145342d3db7SPaul Mackerras npages = get_user_pages_fast(hva, 1, 1, pages); 1146342d3db7SPaul Mackerras if (npages < 1) 11472c9097e4SPaul Mackerras goto err; 1148342d3db7SPaul Mackerras page = pages[0]; 1149342d3db7SPaul Mackerras } 11502c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11512c9097e4SPaul Mackerras 1152da9d1d7fSPaul Mackerras psize = PAGE_SIZE; 1153da9d1d7fSPaul Mackerras if (PageHuge(page)) { 1154da9d1d7fSPaul Mackerras page = compound_head(page); 1155da9d1d7fSPaul Mackerras psize <<= compound_order(page); 1156da9d1d7fSPaul Mackerras } 1157da9d1d7fSPaul Mackerras offset = gpa & (psize - 1); 115893e60249SPaul Mackerras if (nb_ret) 1159da9d1d7fSPaul Mackerras *nb_ret = psize - offset; 116093e60249SPaul Mackerras return page_address(page) + offset; 11612c9097e4SPaul Mackerras 11622c9097e4SPaul Mackerras err: 11632c9097e4SPaul Mackerras srcu_read_unlock(&kvm->srcu, srcu_idx); 11642c9097e4SPaul Mackerras return NULL; 116593e60249SPaul Mackerras } 116693e60249SPaul Mackerras 116793e60249SPaul Mackerras void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) 116893e60249SPaul Mackerras { 116993e60249SPaul Mackerras struct page *page = virt_to_page(va); 117093e60249SPaul Mackerras 117193e60249SPaul Mackerras put_page(page); 117293e60249SPaul Mackerras } 117393e60249SPaul Mackerras 1174a2932923SPaul Mackerras /* 1175a2932923SPaul Mackerras * Functions for reading and writing the hash table via reads and 1176a2932923SPaul Mackerras * writes on a file descriptor. 1177a2932923SPaul Mackerras * 1178a2932923SPaul Mackerras * Reads return the guest view of the hash table, which has to be 1179a2932923SPaul Mackerras * pieced together from the real hash table and the guest_rpte 1180a2932923SPaul Mackerras * values in the revmap array. 1181a2932923SPaul Mackerras * 1182a2932923SPaul Mackerras * On writes, each HPTE written is considered in turn, and if it 1183a2932923SPaul Mackerras * is valid, it is written to the HPT as if an H_ENTER with the 1184a2932923SPaul Mackerras * exact flag set was done. When the invalid count is non-zero 1185a2932923SPaul Mackerras * in the header written to the stream, the kernel will make 1186a2932923SPaul Mackerras * sure that that many HPTEs are invalid, and invalidate them 1187a2932923SPaul Mackerras * if not. 1188a2932923SPaul Mackerras */ 1189a2932923SPaul Mackerras 1190a2932923SPaul Mackerras struct kvm_htab_ctx { 1191a2932923SPaul Mackerras unsigned long index; 1192a2932923SPaul Mackerras unsigned long flags; 1193a2932923SPaul Mackerras struct kvm *kvm; 1194a2932923SPaul Mackerras int first_pass; 1195a2932923SPaul Mackerras }; 1196a2932923SPaul Mackerras 1197a2932923SPaul Mackerras #define HPTE_SIZE (2 * sizeof(unsigned long)) 1198a2932923SPaul Mackerras 1199a2932923SPaul Mackerras static long record_hpte(unsigned long flags, unsigned long *hptp, 1200a2932923SPaul Mackerras unsigned long *hpte, struct revmap_entry *revp, 1201a2932923SPaul Mackerras int want_valid, int first_pass) 1202a2932923SPaul Mackerras { 1203a2932923SPaul Mackerras unsigned long v, r; 1204a2932923SPaul Mackerras int ok = 1; 1205a2932923SPaul Mackerras int valid, dirty; 1206a2932923SPaul Mackerras 1207a2932923SPaul Mackerras /* Unmodified entries are uninteresting except on the first pass */ 1208a2932923SPaul Mackerras dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1209a2932923SPaul Mackerras if (!first_pass && !dirty) 1210a2932923SPaul Mackerras return 0; 1211a2932923SPaul Mackerras 1212a2932923SPaul Mackerras valid = 0; 1213a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { 1214a2932923SPaul Mackerras valid = 1; 1215a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && 1216a2932923SPaul Mackerras !(hptp[0] & HPTE_V_BOLTED)) 1217a2932923SPaul Mackerras valid = 0; 1218a2932923SPaul Mackerras } 1219a2932923SPaul Mackerras if (valid != want_valid) 1220a2932923SPaul Mackerras return 0; 1221a2932923SPaul Mackerras 1222a2932923SPaul Mackerras v = r = 0; 1223a2932923SPaul Mackerras if (valid || dirty) { 1224a2932923SPaul Mackerras /* lock the HPTE so it's stable and read it */ 1225a2932923SPaul Mackerras preempt_disable(); 1226a2932923SPaul Mackerras while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1227a2932923SPaul Mackerras cpu_relax(); 1228a2932923SPaul Mackerras v = hptp[0]; 1229a2932923SPaul Mackerras if (v & HPTE_V_ABSENT) { 1230a2932923SPaul Mackerras v &= ~HPTE_V_ABSENT; 1231a2932923SPaul Mackerras v |= HPTE_V_VALID; 1232a2932923SPaul Mackerras } 1233a2932923SPaul Mackerras /* re-evaluate valid and dirty from synchronized HPTE value */ 1234a2932923SPaul Mackerras valid = !!(v & HPTE_V_VALID); 1235a2932923SPaul Mackerras if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) 1236a2932923SPaul Mackerras valid = 0; 1237a2932923SPaul Mackerras r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); 1238a2932923SPaul Mackerras dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1239a2932923SPaul Mackerras /* only clear modified if this is the right sort of entry */ 1240a2932923SPaul Mackerras if (valid == want_valid && dirty) { 1241a2932923SPaul Mackerras r &= ~HPTE_GR_MODIFIED; 1242a2932923SPaul Mackerras revp->guest_rpte = r; 1243a2932923SPaul Mackerras } 1244a2932923SPaul Mackerras asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); 1245a2932923SPaul Mackerras hptp[0] &= ~HPTE_V_HVLOCK; 1246a2932923SPaul Mackerras preempt_enable(); 1247a2932923SPaul Mackerras if (!(valid == want_valid && (first_pass || dirty))) 1248a2932923SPaul Mackerras ok = 0; 1249a2932923SPaul Mackerras } 1250a2932923SPaul Mackerras hpte[0] = v; 1251a2932923SPaul Mackerras hpte[1] = r; 1252a2932923SPaul Mackerras return ok; 1253a2932923SPaul Mackerras } 1254a2932923SPaul Mackerras 1255a2932923SPaul Mackerras static ssize_t kvm_htab_read(struct file *file, char __user *buf, 1256a2932923SPaul Mackerras size_t count, loff_t *ppos) 1257a2932923SPaul Mackerras { 1258a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1259a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1260a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1261a2932923SPaul Mackerras unsigned long *hptp; 1262a2932923SPaul Mackerras struct revmap_entry *revp; 1263a2932923SPaul Mackerras unsigned long i, nb, nw; 1264a2932923SPaul Mackerras unsigned long __user *lbuf; 1265a2932923SPaul Mackerras struct kvm_get_htab_header __user *hptr; 1266a2932923SPaul Mackerras unsigned long flags; 1267a2932923SPaul Mackerras int first_pass; 1268a2932923SPaul Mackerras unsigned long hpte[2]; 1269a2932923SPaul Mackerras 1270a2932923SPaul Mackerras if (!access_ok(VERIFY_WRITE, buf, count)) 1271a2932923SPaul Mackerras return -EFAULT; 1272a2932923SPaul Mackerras 1273a2932923SPaul Mackerras first_pass = ctx->first_pass; 1274a2932923SPaul Mackerras flags = ctx->flags; 1275a2932923SPaul Mackerras 1276a2932923SPaul Mackerras i = ctx->index; 1277a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1278a2932923SPaul Mackerras revp = kvm->arch.revmap + i; 1279a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1280a2932923SPaul Mackerras 1281a2932923SPaul Mackerras nb = 0; 1282a2932923SPaul Mackerras while (nb + sizeof(hdr) + HPTE_SIZE < count) { 1283a2932923SPaul Mackerras /* Initialize header */ 1284a2932923SPaul Mackerras hptr = (struct kvm_get_htab_header __user *)buf; 1285a2932923SPaul Mackerras hdr.n_valid = 0; 1286a2932923SPaul Mackerras hdr.n_invalid = 0; 1287a2932923SPaul Mackerras nw = nb; 1288a2932923SPaul Mackerras nb += sizeof(hdr); 1289a2932923SPaul Mackerras lbuf = (unsigned long __user *)(buf + sizeof(hdr)); 1290a2932923SPaul Mackerras 1291a2932923SPaul Mackerras /* Skip uninteresting entries, i.e. clean on not-first pass */ 1292a2932923SPaul Mackerras if (!first_pass) { 1293a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1294a2932923SPaul Mackerras !(revp->guest_rpte & HPTE_GR_MODIFIED)) { 1295a2932923SPaul Mackerras ++i; 1296a2932923SPaul Mackerras hptp += 2; 1297a2932923SPaul Mackerras ++revp; 1298a2932923SPaul Mackerras } 1299a2932923SPaul Mackerras } 130005dd85f7SPaul Mackerras hdr.index = i; 1301a2932923SPaul Mackerras 1302a2932923SPaul Mackerras /* Grab a series of valid entries */ 1303a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1304a2932923SPaul Mackerras hdr.n_valid < 0xffff && 1305a2932923SPaul Mackerras nb + HPTE_SIZE < count && 1306a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { 1307a2932923SPaul Mackerras /* valid entry, write it out */ 1308a2932923SPaul Mackerras ++hdr.n_valid; 1309a2932923SPaul Mackerras if (__put_user(hpte[0], lbuf) || 1310a2932923SPaul Mackerras __put_user(hpte[1], lbuf + 1)) 1311a2932923SPaul Mackerras return -EFAULT; 1312a2932923SPaul Mackerras nb += HPTE_SIZE; 1313a2932923SPaul Mackerras lbuf += 2; 1314a2932923SPaul Mackerras ++i; 1315a2932923SPaul Mackerras hptp += 2; 1316a2932923SPaul Mackerras ++revp; 1317a2932923SPaul Mackerras } 1318a2932923SPaul Mackerras /* Now skip invalid entries while we can */ 1319a2932923SPaul Mackerras while (i < kvm->arch.hpt_npte && 1320a2932923SPaul Mackerras hdr.n_invalid < 0xffff && 1321a2932923SPaul Mackerras record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { 1322a2932923SPaul Mackerras /* found an invalid entry */ 1323a2932923SPaul Mackerras ++hdr.n_invalid; 1324a2932923SPaul Mackerras ++i; 1325a2932923SPaul Mackerras hptp += 2; 1326a2932923SPaul Mackerras ++revp; 1327a2932923SPaul Mackerras } 1328a2932923SPaul Mackerras 1329a2932923SPaul Mackerras if (hdr.n_valid || hdr.n_invalid) { 1330a2932923SPaul Mackerras /* write back the header */ 1331a2932923SPaul Mackerras if (__copy_to_user(hptr, &hdr, sizeof(hdr))) 1332a2932923SPaul Mackerras return -EFAULT; 1333a2932923SPaul Mackerras nw = nb; 1334a2932923SPaul Mackerras buf = (char __user *)lbuf; 1335a2932923SPaul Mackerras } else { 1336a2932923SPaul Mackerras nb = nw; 1337a2932923SPaul Mackerras } 1338a2932923SPaul Mackerras 1339a2932923SPaul Mackerras /* Check if we've wrapped around the hash table */ 1340a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte) { 1341a2932923SPaul Mackerras i = 0; 1342a2932923SPaul Mackerras ctx->first_pass = 0; 1343a2932923SPaul Mackerras break; 1344a2932923SPaul Mackerras } 1345a2932923SPaul Mackerras } 1346a2932923SPaul Mackerras 1347a2932923SPaul Mackerras ctx->index = i; 1348a2932923SPaul Mackerras 1349a2932923SPaul Mackerras return nb; 1350a2932923SPaul Mackerras } 1351a2932923SPaul Mackerras 1352a2932923SPaul Mackerras static ssize_t kvm_htab_write(struct file *file, const char __user *buf, 1353a2932923SPaul Mackerras size_t count, loff_t *ppos) 1354a2932923SPaul Mackerras { 1355a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = file->private_data; 1356a2932923SPaul Mackerras struct kvm *kvm = ctx->kvm; 1357a2932923SPaul Mackerras struct kvm_get_htab_header hdr; 1358a2932923SPaul Mackerras unsigned long i, j; 1359a2932923SPaul Mackerras unsigned long v, r; 1360a2932923SPaul Mackerras unsigned long __user *lbuf; 1361a2932923SPaul Mackerras unsigned long *hptp; 1362a2932923SPaul Mackerras unsigned long tmp[2]; 1363a2932923SPaul Mackerras ssize_t nb; 1364a2932923SPaul Mackerras long int err, ret; 1365a2932923SPaul Mackerras int rma_setup; 1366a2932923SPaul Mackerras 1367a2932923SPaul Mackerras if (!access_ok(VERIFY_READ, buf, count)) 1368a2932923SPaul Mackerras return -EFAULT; 1369a2932923SPaul Mackerras 1370a2932923SPaul Mackerras /* lock out vcpus from running while we're doing this */ 1371a2932923SPaul Mackerras mutex_lock(&kvm->lock); 1372a2932923SPaul Mackerras rma_setup = kvm->arch.rma_setup_done; 1373a2932923SPaul Mackerras if (rma_setup) { 1374a2932923SPaul Mackerras kvm->arch.rma_setup_done = 0; /* temporarily */ 1375a2932923SPaul Mackerras /* order rma_setup_done vs. vcpus_running */ 1376a2932923SPaul Mackerras smp_mb(); 1377a2932923SPaul Mackerras if (atomic_read(&kvm->arch.vcpus_running)) { 1378a2932923SPaul Mackerras kvm->arch.rma_setup_done = 1; 1379a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1380a2932923SPaul Mackerras return -EBUSY; 1381a2932923SPaul Mackerras } 1382a2932923SPaul Mackerras } 1383a2932923SPaul Mackerras 1384a2932923SPaul Mackerras err = 0; 1385a2932923SPaul Mackerras for (nb = 0; nb + sizeof(hdr) <= count; ) { 1386a2932923SPaul Mackerras err = -EFAULT; 1387a2932923SPaul Mackerras if (__copy_from_user(&hdr, buf, sizeof(hdr))) 1388a2932923SPaul Mackerras break; 1389a2932923SPaul Mackerras 1390a2932923SPaul Mackerras err = 0; 1391a2932923SPaul Mackerras if (nb + hdr.n_valid * HPTE_SIZE > count) 1392a2932923SPaul Mackerras break; 1393a2932923SPaul Mackerras 1394a2932923SPaul Mackerras nb += sizeof(hdr); 1395a2932923SPaul Mackerras buf += sizeof(hdr); 1396a2932923SPaul Mackerras 1397a2932923SPaul Mackerras err = -EINVAL; 1398a2932923SPaul Mackerras i = hdr.index; 1399a2932923SPaul Mackerras if (i >= kvm->arch.hpt_npte || 1400a2932923SPaul Mackerras i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) 1401a2932923SPaul Mackerras break; 1402a2932923SPaul Mackerras 1403a2932923SPaul Mackerras hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1404a2932923SPaul Mackerras lbuf = (unsigned long __user *)buf; 1405a2932923SPaul Mackerras for (j = 0; j < hdr.n_valid; ++j) { 1406a2932923SPaul Mackerras err = -EFAULT; 1407a2932923SPaul Mackerras if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) 1408a2932923SPaul Mackerras goto out; 1409a2932923SPaul Mackerras err = -EINVAL; 1410a2932923SPaul Mackerras if (!(v & HPTE_V_VALID)) 1411a2932923SPaul Mackerras goto out; 1412a2932923SPaul Mackerras lbuf += 2; 1413a2932923SPaul Mackerras nb += HPTE_SIZE; 1414a2932923SPaul Mackerras 1415a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1416a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1417a2932923SPaul Mackerras err = -EIO; 1418a2932923SPaul Mackerras ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, 1419a2932923SPaul Mackerras tmp); 1420a2932923SPaul Mackerras if (ret != H_SUCCESS) { 1421a2932923SPaul Mackerras pr_err("kvm_htab_write ret %ld i=%ld v=%lx " 1422a2932923SPaul Mackerras "r=%lx\n", ret, i, v, r); 1423a2932923SPaul Mackerras goto out; 1424a2932923SPaul Mackerras } 1425a2932923SPaul Mackerras if (!rma_setup && is_vrma_hpte(v)) { 1426a2932923SPaul Mackerras unsigned long psize = hpte_page_size(v, r); 1427a2932923SPaul Mackerras unsigned long senc = slb_pgsize_encoding(psize); 1428a2932923SPaul Mackerras unsigned long lpcr; 1429a2932923SPaul Mackerras 1430a2932923SPaul Mackerras kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1431a2932923SPaul Mackerras (VRMA_VSID << SLB_VSID_SHIFT_1T); 1432a2932923SPaul Mackerras lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1433a2932923SPaul Mackerras lpcr |= senc << (LPCR_VRMASD_SH - 4); 1434a2932923SPaul Mackerras kvm->arch.lpcr = lpcr; 1435a2932923SPaul Mackerras rma_setup = 1; 1436a2932923SPaul Mackerras } 1437a2932923SPaul Mackerras ++i; 1438a2932923SPaul Mackerras hptp += 2; 1439a2932923SPaul Mackerras } 1440a2932923SPaul Mackerras 1441a2932923SPaul Mackerras for (j = 0; j < hdr.n_invalid; ++j) { 1442a2932923SPaul Mackerras if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) 1443a2932923SPaul Mackerras kvmppc_do_h_remove(kvm, 0, i, 0, tmp); 1444a2932923SPaul Mackerras ++i; 1445a2932923SPaul Mackerras hptp += 2; 1446a2932923SPaul Mackerras } 1447a2932923SPaul Mackerras err = 0; 1448a2932923SPaul Mackerras } 1449a2932923SPaul Mackerras 1450a2932923SPaul Mackerras out: 1451a2932923SPaul Mackerras /* Order HPTE updates vs. rma_setup_done */ 1452a2932923SPaul Mackerras smp_wmb(); 1453a2932923SPaul Mackerras kvm->arch.rma_setup_done = rma_setup; 1454a2932923SPaul Mackerras mutex_unlock(&kvm->lock); 1455a2932923SPaul Mackerras 1456a2932923SPaul Mackerras if (err) 1457a2932923SPaul Mackerras return err; 1458a2932923SPaul Mackerras return nb; 1459a2932923SPaul Mackerras } 1460a2932923SPaul Mackerras 1461a2932923SPaul Mackerras static int kvm_htab_release(struct inode *inode, struct file *filp) 1462a2932923SPaul Mackerras { 1463a2932923SPaul Mackerras struct kvm_htab_ctx *ctx = filp->private_data; 1464a2932923SPaul Mackerras 1465a2932923SPaul Mackerras filp->private_data = NULL; 1466a2932923SPaul Mackerras if (!(ctx->flags & KVM_GET_HTAB_WRITE)) 1467a2932923SPaul Mackerras atomic_dec(&ctx->kvm->arch.hpte_mod_interest); 1468a2932923SPaul Mackerras kvm_put_kvm(ctx->kvm); 1469a2932923SPaul Mackerras kfree(ctx); 1470a2932923SPaul Mackerras return 0; 1471a2932923SPaul Mackerras } 1472a2932923SPaul Mackerras 1473a2932923SPaul Mackerras static struct file_operations kvm_htab_fops = { 1474a2932923SPaul Mackerras .read = kvm_htab_read, 1475a2932923SPaul Mackerras .write = kvm_htab_write, 1476a2932923SPaul Mackerras .llseek = default_llseek, 1477a2932923SPaul Mackerras .release = kvm_htab_release, 1478a2932923SPaul Mackerras }; 1479a2932923SPaul Mackerras 1480a2932923SPaul Mackerras int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) 1481a2932923SPaul Mackerras { 1482a2932923SPaul Mackerras int ret; 1483a2932923SPaul Mackerras struct kvm_htab_ctx *ctx; 1484a2932923SPaul Mackerras int rwflag; 1485a2932923SPaul Mackerras 1486a2932923SPaul Mackerras /* reject flags we don't recognize */ 1487a2932923SPaul Mackerras if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) 1488a2932923SPaul Mackerras return -EINVAL; 1489a2932923SPaul Mackerras ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1490a2932923SPaul Mackerras if (!ctx) 1491a2932923SPaul Mackerras return -ENOMEM; 1492a2932923SPaul Mackerras kvm_get_kvm(kvm); 1493a2932923SPaul Mackerras ctx->kvm = kvm; 1494a2932923SPaul Mackerras ctx->index = ghf->start_index; 1495a2932923SPaul Mackerras ctx->flags = ghf->flags; 1496a2932923SPaul Mackerras ctx->first_pass = 1; 1497a2932923SPaul Mackerras 1498a2932923SPaul Mackerras rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; 1499a2932923SPaul Mackerras ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); 1500a2932923SPaul Mackerras if (ret < 0) { 1501a2932923SPaul Mackerras kvm_put_kvm(kvm); 1502a2932923SPaul Mackerras return ret; 1503a2932923SPaul Mackerras } 1504a2932923SPaul Mackerras 1505a2932923SPaul Mackerras if (rwflag == O_RDONLY) { 1506a2932923SPaul Mackerras mutex_lock(&kvm->slots_lock); 1507a2932923SPaul Mackerras atomic_inc(&kvm->arch.hpte_mod_interest); 1508a2932923SPaul Mackerras /* make sure kvmppc_do_h_enter etc. see the increment */ 1509a2932923SPaul Mackerras synchronize_srcu_expedited(&kvm->srcu); 1510a2932923SPaul Mackerras mutex_unlock(&kvm->slots_lock); 1511a2932923SPaul Mackerras } 1512a2932923SPaul Mackerras 1513a2932923SPaul Mackerras return ret; 1514a2932923SPaul Mackerras } 1515a2932923SPaul Mackerras 1516de56a948SPaul Mackerras void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1517de56a948SPaul Mackerras { 1518de56a948SPaul Mackerras struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1519de56a948SPaul Mackerras 15209e368f29SPaul Mackerras if (cpu_has_feature(CPU_FTR_ARCH_206)) 15219e368f29SPaul Mackerras vcpu->arch.slb_nr = 32; /* POWER7 */ 15229e368f29SPaul Mackerras else 15239e368f29SPaul Mackerras vcpu->arch.slb_nr = 64; 1524de56a948SPaul Mackerras 1525de56a948SPaul Mackerras mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1526de56a948SPaul Mackerras mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1527de56a948SPaul Mackerras 1528de56a948SPaul Mackerras vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 1529de56a948SPaul Mackerras } 1530