1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright SUSE Linux Products GmbH 2010 16 * 17 * Authors: Alexander Graf <agraf@suse.de> 18 */ 19 20 #ifndef __ASM_KVM_BOOK3S_64_H__ 21 #define __ASM_KVM_BOOK3S_64_H__ 22 23 #include <asm/book3s/64/mmu-hash.h> 24 25 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 26 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 27 { 28 preempt_disable(); 29 return &get_paca()->shadow_vcpu; 30 } 31 32 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) 33 { 34 preempt_enable(); 35 } 36 #endif 37 38 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 39 #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 40 #endif 41 42 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ 43 44 /* 45 * We use a lock bit in HPTE dword 0 to synchronize updates and 46 * accesses to each HPTE, and another bit to indicate non-present 47 * HPTEs. 48 */ 49 #define HPTE_V_HVLOCK 0x40UL 50 #define HPTE_V_ABSENT 0x20UL 51 52 /* 53 * We use this bit in the guest_rpte field of the revmap entry 54 * to indicate a modified HPTE. 55 */ 56 #define HPTE_GR_MODIFIED (1ul << 62) 57 58 /* These bits are reserved in the guest view of the HPTE */ 59 #define HPTE_GR_RESERVED HPTE_GR_MODIFIED 60 61 static inline long try_lock_hpte(__be64 *hpte, unsigned long bits) 62 { 63 unsigned long tmp, old; 64 __be64 be_lockbit, be_bits; 65 66 /* 67 * We load/store in native endian, but the HTAB is in big endian. If 68 * we byte swap all data we apply on the PTE we're implicitly correct 69 * again. 70 */ 71 be_lockbit = cpu_to_be64(HPTE_V_HVLOCK); 72 be_bits = cpu_to_be64(bits); 73 74 asm volatile(" ldarx %0,0,%2\n" 75 " and. %1,%0,%3\n" 76 " bne 2f\n" 77 " or %0,%0,%4\n" 78 " stdcx. %0,0,%2\n" 79 " beq+ 2f\n" 80 " mr %1,%3\n" 81 "2: isync" 82 : "=&r" (tmp), "=&r" (old) 83 : "r" (hpte), "r" (be_bits), "r" (be_lockbit) 84 : "cc", "memory"); 85 return old == 0; 86 } 87 88 static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) 89 { 90 hpte_v &= ~HPTE_V_HVLOCK; 91 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); 92 hpte[0] = cpu_to_be64(hpte_v); 93 } 94 95 /* Without barrier */ 96 static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v) 97 { 98 hpte_v &= ~HPTE_V_HVLOCK; 99 hpte[0] = cpu_to_be64(hpte_v); 100 } 101 102 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 103 unsigned long pte_index) 104 { 105 int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; 106 unsigned int penc; 107 unsigned long rb = 0, va_low, sllp; 108 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); 109 110 if (v & HPTE_V_LARGE) { 111 i = hpte_page_sizes[lp]; 112 b_psize = i & 0xf; 113 a_psize = i >> 4; 114 } 115 116 /* 117 * Ignore the top 14 bits of va 118 * v have top two bits covering segment size, hence move 119 * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits. 120 * AVA field in v also have the lower 23 bits ignored. 121 * For base page size 4K we need 14 .. 65 bits (so need to 122 * collect extra 11 bits) 123 * For others we need 14..14+i 124 */ 125 /* This covers 14..54 bits of va*/ 126 rb = (v & ~0x7fUL) << 16; /* AVA field */ 127 128 /* 129 * AVA in v had cleared lower 23 bits. We need to derive 130 * that from pteg index 131 */ 132 va_low = pte_index >> 3; 133 if (v & HPTE_V_SECONDARY) 134 va_low = ~va_low; 135 /* 136 * get the vpn bits from va_low using reverse of hashing. 137 * In v we have va with 23 bits dropped and then left shifted 138 * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need 139 * right shift it with (SID_SHIFT - (23 - 7)) 140 */ 141 if (!(v & HPTE_V_1TB_SEG)) 142 va_low ^= v >> (SID_SHIFT - 16); 143 else 144 va_low ^= v >> (SID_SHIFT_1T - 16); 145 va_low &= 0x7ff; 146 147 switch (b_psize) { 148 case MMU_PAGE_4K: 149 sllp = get_sllp_encoding(a_psize); 150 rb |= sllp << 5; /* AP field */ 151 rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */ 152 break; 153 default: 154 { 155 int aval_shift; 156 /* 157 * remaining bits of AVA/LP fields 158 * Also contain the rr bits of LP 159 */ 160 rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; 161 /* 162 * Now clear not needed LP bits based on actual psize 163 */ 164 rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1); 165 /* 166 * AVAL field 58..77 - base_page_shift bits of va 167 * we have space for 58..64 bits, Missing bits should 168 * be zero filled. +1 is to take care of L bit shift 169 */ 170 aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1; 171 rb |= ((va_low << aval_shift) & 0xfe); 172 173 rb |= 1; /* L field */ 174 penc = mmu_psize_defs[b_psize].penc[a_psize]; 175 rb |= penc << 12; /* LP field */ 176 break; 177 } 178 } 179 rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ 180 return rb; 181 } 182 183 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) 184 { 185 return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; 186 } 187 188 static inline int hpte_is_writable(unsigned long ptel) 189 { 190 unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP); 191 192 return pp != PP_RXRX && pp != PP_RXXX; 193 } 194 195 static inline unsigned long hpte_make_readonly(unsigned long ptel) 196 { 197 if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX) 198 ptel = (ptel & ~HPTE_R_PP) | PP_RXXX; 199 else 200 ptel |= PP_RXRX; 201 return ptel; 202 } 203 204 static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci) 205 { 206 unsigned int wimg = hptel & HPTE_R_WIMG; 207 208 /* Handle SAO */ 209 if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) && 210 cpu_has_feature(CPU_FTR_ARCH_206)) 211 wimg = HPTE_R_M; 212 213 if (!is_ci) 214 return wimg == HPTE_R_M; 215 /* 216 * if host is mapped cache inhibited, make sure hptel also have 217 * cache inhibited. 218 */ 219 if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */ 220 return false; 221 return !!(wimg & HPTE_R_I); 222 } 223 224 /* 225 * If it's present and writable, atomically set dirty and referenced bits and 226 * return the PTE, otherwise return 0. 227 */ 228 static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) 229 { 230 pte_t old_pte, new_pte = __pte(0); 231 232 while (1) { 233 /* 234 * Make sure we don't reload from ptep 235 */ 236 old_pte = READ_ONCE(*ptep); 237 /* 238 * wait until H_PAGE_BUSY is clear then set it atomically 239 */ 240 if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) { 241 cpu_relax(); 242 continue; 243 } 244 /* If pte is not present return None */ 245 if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) 246 return __pte(0); 247 248 new_pte = pte_mkyoung(old_pte); 249 if (writing && pte_write(old_pte)) 250 new_pte = pte_mkdirty(new_pte); 251 252 if (pte_xchg(ptep, old_pte, new_pte)) 253 break; 254 } 255 return new_pte; 256 } 257 258 static inline bool hpte_read_permission(unsigned long pp, unsigned long key) 259 { 260 if (key) 261 return PP_RWRX <= pp && pp <= PP_RXRX; 262 return true; 263 } 264 265 static inline bool hpte_write_permission(unsigned long pp, unsigned long key) 266 { 267 if (key) 268 return pp == PP_RWRW; 269 return pp <= PP_RWRW; 270 } 271 272 static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr) 273 { 274 unsigned long skey; 275 276 skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) | 277 ((hpte_r & HPTE_R_KEY_LO) >> 9); 278 return (amr >> (62 - 2 * skey)) & 3; 279 } 280 281 static inline void lock_rmap(unsigned long *rmap) 282 { 283 do { 284 while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap)) 285 cpu_relax(); 286 } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap)); 287 } 288 289 static inline void unlock_rmap(unsigned long *rmap) 290 { 291 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap); 292 } 293 294 static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, 295 unsigned long pagesize) 296 { 297 unsigned long mask = (pagesize >> PAGE_SHIFT) - 1; 298 299 if (pagesize <= PAGE_SIZE) 300 return true; 301 return !(memslot->base_gfn & mask) && !(memslot->npages & mask); 302 } 303 304 /* 305 * This works for 4k, 64k and 16M pages on POWER7, 306 * and 4k and 16M pages on PPC970. 307 */ 308 static inline unsigned long slb_pgsize_encoding(unsigned long psize) 309 { 310 unsigned long senc = 0; 311 312 if (psize > 0x1000) { 313 senc = SLB_VSID_L; 314 if (psize == 0x10000) 315 senc |= SLB_VSID_LP_01; 316 } 317 return senc; 318 } 319 320 static inline int is_vrma_hpte(unsigned long hpte_v) 321 { 322 return (hpte_v & ~0xffffffUL) == 323 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); 324 } 325 326 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 327 /* 328 * Note modification of an HPTE; set the HPTE modified bit 329 * if anyone is interested. 330 */ 331 static inline void note_hpte_modification(struct kvm *kvm, 332 struct revmap_entry *rev) 333 { 334 if (atomic_read(&kvm->arch.hpte_mod_interest)) 335 rev->guest_rpte |= HPTE_GR_MODIFIED; 336 } 337 338 /* 339 * Like kvm_memslots(), but for use in real mode when we can't do 340 * any RCU stuff (since the secondary threads are offline from the 341 * kernel's point of view), and we can't print anything. 342 * Thus we use rcu_dereference_raw() rather than rcu_dereference_check(). 343 */ 344 static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) 345 { 346 return rcu_dereference_raw_notrace(kvm->memslots[0]); 347 } 348 349 extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); 350 351 extern void kvmhv_rm_send_ipi(int cpu); 352 353 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 354 355 #endif /* __ASM_KVM_BOOK3S_64_H__ */ 356