1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * This module enables kernel and guest-mode vCPU access to guest physical 6 * memory with suitable invalidation mechanisms. 7 * 8 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 9 * 10 * Authors: 11 * David Woodhouse <dwmw2@infradead.org> 12 */ 13 14 #include <linux/kvm_host.h> 15 #include <linux/kvm.h> 16 #include <linux/highmem.h> 17 #include <linux/module.h> 18 #include <linux/errno.h> 19 20 #include "kvm_mm.h" 21 22 /* 23 * MMU notifier 'invalidate_range_start' hook. 24 */ 25 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, 26 unsigned long end, bool may_block) 27 { 28 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 29 struct gfn_to_pfn_cache *gpc; 30 bool wake_vcpus = false; 31 32 spin_lock(&kvm->gpc_lock); 33 list_for_each_entry(gpc, &kvm->gpc_list, list) { 34 write_lock_irq(&gpc->lock); 35 36 /* Only a single page so no need to care about length */ 37 if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && 38 gpc->uhva >= start && gpc->uhva < end) { 39 gpc->valid = false; 40 41 /* 42 * If a guest vCPU could be using the physical address, 43 * it needs to be woken. 44 */ 45 if (gpc->guest_uses_pa) { 46 if (!wake_vcpus) { 47 wake_vcpus = true; 48 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 49 } 50 __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap); 51 } 52 53 /* 54 * We cannot call mark_page_dirty() from here because 55 * this physical CPU might not have an active vCPU 56 * with which to do the KVM dirty tracking. 57 * 58 * Neither is there any point in telling the kernel MM 59 * that the underlying page is dirty. A vCPU in guest 60 * mode might still be writing to it up to the point 61 * where we wake them a few lines further down anyway. 62 * 63 * So all the dirty marking happens on the unmap. 64 */ 65 } 66 write_unlock_irq(&gpc->lock); 67 } 68 spin_unlock(&kvm->gpc_lock); 69 70 if (wake_vcpus) { 71 unsigned int req = KVM_REQ_GPC_INVALIDATE; 72 bool called; 73 74 /* 75 * If the OOM reaper is active, then all vCPUs should have 76 * been stopped already, so perform the request without 77 * KVM_REQUEST_WAIT and be sad if any needed to be woken. 78 */ 79 if (!may_block) 80 req &= ~KVM_REQUEST_WAIT; 81 82 called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap); 83 84 WARN_ON_ONCE(called && !may_block); 85 } 86 } 87 88 bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, 89 gpa_t gpa, unsigned long len) 90 { 91 struct kvm_memslots *slots = kvm_memslots(kvm); 92 93 if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE) 94 return false; 95 96 if (gpc->gpa != gpa || gpc->generation != slots->generation || 97 kvm_is_error_hva(gpc->uhva)) 98 return false; 99 100 if (!gpc->valid) 101 return false; 102 103 return true; 104 } 105 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check); 106 107 static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, 108 gpa_t gpa, bool dirty) 109 { 110 /* Unmap the old page if it was mapped before, and release it */ 111 if (!is_error_noslot_pfn(pfn)) { 112 if (khva) { 113 if (pfn_valid(pfn)) 114 kunmap(pfn_to_page(pfn)); 115 #ifdef CONFIG_HAS_IOMEM 116 else 117 memunmap(khva); 118 #endif 119 } 120 121 kvm_release_pfn(pfn, dirty); 122 if (dirty) 123 mark_page_dirty(kvm, gpa); 124 } 125 } 126 127 static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva) 128 { 129 unsigned long mmu_seq; 130 kvm_pfn_t new_pfn; 131 int retry; 132 133 do { 134 mmu_seq = kvm->mmu_notifier_seq; 135 smp_rmb(); 136 137 /* We always request a writeable mapping */ 138 new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL); 139 if (is_error_noslot_pfn(new_pfn)) 140 break; 141 142 KVM_MMU_READ_LOCK(kvm); 143 retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva); 144 KVM_MMU_READ_UNLOCK(kvm); 145 if (!retry) 146 break; 147 148 cond_resched(); 149 } while (1); 150 151 return new_pfn; 152 } 153 154 int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, 155 gpa_t gpa, unsigned long len, bool dirty) 156 { 157 struct kvm_memslots *slots = kvm_memslots(kvm); 158 unsigned long page_offset = gpa & ~PAGE_MASK; 159 kvm_pfn_t old_pfn, new_pfn; 160 unsigned long old_uhva; 161 gpa_t old_gpa; 162 void *old_khva; 163 bool old_valid, old_dirty; 164 int ret = 0; 165 166 /* 167 * If must fit within a single page. The 'len' argument is 168 * only to enforce that. 169 */ 170 if (page_offset + len > PAGE_SIZE) 171 return -EINVAL; 172 173 write_lock_irq(&gpc->lock); 174 175 old_gpa = gpc->gpa; 176 old_pfn = gpc->pfn; 177 old_khva = gpc->khva - offset_in_page(gpc->khva); 178 old_uhva = gpc->uhva; 179 old_valid = gpc->valid; 180 old_dirty = gpc->dirty; 181 182 /* If the userspace HVA is invalid, refresh that first */ 183 if (gpc->gpa != gpa || gpc->generation != slots->generation || 184 kvm_is_error_hva(gpc->uhva)) { 185 gfn_t gfn = gpa_to_gfn(gpa); 186 187 gpc->dirty = false; 188 gpc->gpa = gpa; 189 gpc->generation = slots->generation; 190 gpc->memslot = __gfn_to_memslot(slots, gfn); 191 gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); 192 193 if (kvm_is_error_hva(gpc->uhva)) { 194 ret = -EFAULT; 195 goto out; 196 } 197 198 gpc->uhva += page_offset; 199 } 200 201 /* 202 * If the userspace HVA changed or the PFN was already invalid, 203 * drop the lock and do the HVA to PFN lookup again. 204 */ 205 if (!old_valid || old_uhva != gpc->uhva) { 206 unsigned long uhva = gpc->uhva; 207 void *new_khva = NULL; 208 209 /* Placeholders for "hva is valid but not yet mapped" */ 210 gpc->pfn = KVM_PFN_ERR_FAULT; 211 gpc->khva = NULL; 212 gpc->valid = true; 213 214 write_unlock_irq(&gpc->lock); 215 216 new_pfn = hva_to_pfn_retry(kvm, uhva); 217 if (is_error_noslot_pfn(new_pfn)) { 218 ret = -EFAULT; 219 goto map_done; 220 } 221 222 if (gpc->kernel_map) { 223 if (new_pfn == old_pfn) { 224 new_khva = old_khva; 225 old_pfn = KVM_PFN_ERR_FAULT; 226 old_khva = NULL; 227 } else if (pfn_valid(new_pfn)) { 228 new_khva = kmap(pfn_to_page(new_pfn)); 229 #ifdef CONFIG_HAS_IOMEM 230 } else { 231 new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB); 232 #endif 233 } 234 if (new_khva) 235 new_khva += page_offset; 236 else 237 ret = -EFAULT; 238 } 239 240 map_done: 241 write_lock_irq(&gpc->lock); 242 if (ret) { 243 gpc->valid = false; 244 gpc->pfn = KVM_PFN_ERR_FAULT; 245 gpc->khva = NULL; 246 } else { 247 /* At this point, gpc->valid may already have been cleared */ 248 gpc->pfn = new_pfn; 249 gpc->khva = new_khva; 250 } 251 } else { 252 /* If the HVA→PFN mapping was already valid, don't unmap it. */ 253 old_pfn = KVM_PFN_ERR_FAULT; 254 old_khva = NULL; 255 } 256 257 out: 258 if (ret) 259 gpc->dirty = false; 260 else 261 gpc->dirty = dirty; 262 263 write_unlock_irq(&gpc->lock); 264 265 __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty); 266 267 return ret; 268 } 269 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh); 270 271 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) 272 { 273 void *old_khva; 274 kvm_pfn_t old_pfn; 275 bool old_dirty; 276 gpa_t old_gpa; 277 278 write_lock_irq(&gpc->lock); 279 280 gpc->valid = false; 281 282 old_khva = gpc->khva - offset_in_page(gpc->khva); 283 old_dirty = gpc->dirty; 284 old_gpa = gpc->gpa; 285 old_pfn = gpc->pfn; 286 287 /* 288 * We can leave the GPA → uHVA map cache intact but the PFN 289 * lookup will need to be redone even for the same page. 290 */ 291 gpc->khva = NULL; 292 gpc->pfn = KVM_PFN_ERR_FAULT; 293 294 write_unlock_irq(&gpc->lock); 295 296 __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty); 297 } 298 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); 299 300 301 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, 302 struct kvm_vcpu *vcpu, bool guest_uses_pa, 303 bool kernel_map, gpa_t gpa, unsigned long len, 304 bool dirty) 305 { 306 if (!gpc->active) { 307 rwlock_init(&gpc->lock); 308 309 gpc->khva = NULL; 310 gpc->pfn = KVM_PFN_ERR_FAULT; 311 gpc->uhva = KVM_HVA_ERR_BAD; 312 gpc->vcpu = vcpu; 313 gpc->kernel_map = kernel_map; 314 gpc->guest_uses_pa = guest_uses_pa; 315 gpc->valid = false; 316 gpc->active = true; 317 318 spin_lock(&kvm->gpc_lock); 319 list_add(&gpc->list, &kvm->gpc_list); 320 spin_unlock(&kvm->gpc_lock); 321 } 322 return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty); 323 } 324 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init); 325 326 void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) 327 { 328 if (gpc->active) { 329 spin_lock(&kvm->gpc_lock); 330 list_del(&gpc->list); 331 spin_unlock(&kvm->gpc_lock); 332 333 kvm_gfn_to_pfn_cache_unmap(kvm, gpc); 334 gpc->active = false; 335 } 336 } 337 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy); 338