xref: /openbmc/linux/virt/kvm/pfncache.c (revision 9f99d983)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * This module enables kernel and guest-mode vCPU access to guest physical
6  * memory with suitable invalidation mechanisms.
7  *
8  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
9  *
10  * Authors:
11  *   David Woodhouse <dwmw2@infradead.org>
12  */
13 
14 #include <linux/kvm_host.h>
15 #include <linux/kvm.h>
16 #include <linux/highmem.h>
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 
20 #include "kvm_mm.h"
21 
22 /*
23  * MMU notifier 'invalidate_range_start' hook.
24  */
25 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
26 				       unsigned long end, bool may_block)
27 {
28 	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
29 	struct gfn_to_pfn_cache *gpc;
30 	bool wake_vcpus = false;
31 
32 	spin_lock(&kvm->gpc_lock);
33 	list_for_each_entry(gpc, &kvm->gpc_list, list) {
34 		write_lock_irq(&gpc->lock);
35 
36 		/* Only a single page so no need to care about length */
37 		if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
38 		    gpc->uhva >= start && gpc->uhva < end) {
39 			gpc->valid = false;
40 
41 			/*
42 			 * If a guest vCPU could be using the physical address,
43 			 * it needs to be woken.
44 			 */
45 			if (gpc->guest_uses_pa) {
46 				if (!wake_vcpus) {
47 					wake_vcpus = true;
48 					bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
49 				}
50 				__set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
51 			}
52 
53 			/*
54 			 * We cannot call mark_page_dirty() from here because
55 			 * this physical CPU might not have an active vCPU
56 			 * with which to do the KVM dirty tracking.
57 			 *
58 			 * Neither is there any point in telling the kernel MM
59 			 * that the underlying page is dirty. A vCPU in guest
60 			 * mode might still be writing to it up to the point
61 			 * where we wake them a few lines further down anyway.
62 			 *
63 			 * So all the dirty marking happens on the unmap.
64 			 */
65 		}
66 		write_unlock_irq(&gpc->lock);
67 	}
68 	spin_unlock(&kvm->gpc_lock);
69 
70 	if (wake_vcpus) {
71 		unsigned int req = KVM_REQ_GPC_INVALIDATE;
72 		bool called;
73 
74 		/*
75 		 * If the OOM reaper is active, then all vCPUs should have
76 		 * been stopped already, so perform the request without
77 		 * KVM_REQUEST_WAIT and be sad if any needed to be woken.
78 		 */
79 		if (!may_block)
80 			req &= ~KVM_REQUEST_WAIT;
81 
82 		called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
83 
84 		WARN_ON_ONCE(called && !may_block);
85 	}
86 }
87 
88 bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
89 				gpa_t gpa, unsigned long len)
90 {
91 	struct kvm_memslots *slots = kvm_memslots(kvm);
92 
93 	if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
94 		return false;
95 
96 	if (gpc->gpa != gpa || gpc->generation != slots->generation ||
97 	    kvm_is_error_hva(gpc->uhva))
98 		return false;
99 
100 	if (!gpc->valid)
101 		return false;
102 
103 	return true;
104 }
105 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
106 
107 static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
108 			  gpa_t gpa, bool dirty)
109 {
110 	/* Unmap the old page if it was mapped before, and release it */
111 	if (!is_error_noslot_pfn(pfn)) {
112 		if (khva) {
113 			if (pfn_valid(pfn))
114 				kunmap(pfn_to_page(pfn));
115 #ifdef CONFIG_HAS_IOMEM
116 			else
117 				memunmap(khva);
118 #endif
119 		}
120 
121 		kvm_release_pfn(pfn, dirty);
122 		if (dirty)
123 			mark_page_dirty(kvm, gpa);
124 	}
125 }
126 
127 static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
128 {
129 	unsigned long mmu_seq;
130 	kvm_pfn_t new_pfn;
131 	int retry;
132 
133 	do {
134 		mmu_seq = kvm->mmu_notifier_seq;
135 		smp_rmb();
136 
137 		/* We always request a writeable mapping */
138 		new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
139 		if (is_error_noslot_pfn(new_pfn))
140 			break;
141 
142 		KVM_MMU_READ_LOCK(kvm);
143 		retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
144 		KVM_MMU_READ_UNLOCK(kvm);
145 		if (!retry)
146 			break;
147 
148 		cond_resched();
149 	} while (1);
150 
151 	return new_pfn;
152 }
153 
154 int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
155 				 gpa_t gpa, unsigned long len, bool dirty)
156 {
157 	struct kvm_memslots *slots = kvm_memslots(kvm);
158 	unsigned long page_offset = gpa & ~PAGE_MASK;
159 	kvm_pfn_t old_pfn, new_pfn;
160 	unsigned long old_uhva;
161 	gpa_t old_gpa;
162 	void *old_khva;
163 	bool old_valid, old_dirty;
164 	int ret = 0;
165 
166 	/*
167 	 * If must fit within a single page. The 'len' argument is
168 	 * only to enforce that.
169 	 */
170 	if (page_offset + len > PAGE_SIZE)
171 		return -EINVAL;
172 
173 	write_lock_irq(&gpc->lock);
174 
175 	old_gpa = gpc->gpa;
176 	old_pfn = gpc->pfn;
177 	old_khva = gpc->khva - offset_in_page(gpc->khva);
178 	old_uhva = gpc->uhva;
179 	old_valid = gpc->valid;
180 	old_dirty = gpc->dirty;
181 
182 	/* If the userspace HVA is invalid, refresh that first */
183 	if (gpc->gpa != gpa || gpc->generation != slots->generation ||
184 	    kvm_is_error_hva(gpc->uhva)) {
185 		gfn_t gfn = gpa_to_gfn(gpa);
186 
187 		gpc->dirty = false;
188 		gpc->gpa = gpa;
189 		gpc->generation = slots->generation;
190 		gpc->memslot = __gfn_to_memslot(slots, gfn);
191 		gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
192 
193 		if (kvm_is_error_hva(gpc->uhva)) {
194 			ret = -EFAULT;
195 			goto out;
196 		}
197 
198 		gpc->uhva += page_offset;
199 	}
200 
201 	/*
202 	 * If the userspace HVA changed or the PFN was already invalid,
203 	 * drop the lock and do the HVA to PFN lookup again.
204 	 */
205 	if (!old_valid || old_uhva != gpc->uhva) {
206 		unsigned long uhva = gpc->uhva;
207 		void *new_khva = NULL;
208 
209 		/* Placeholders for "hva is valid but not yet mapped" */
210 		gpc->pfn = KVM_PFN_ERR_FAULT;
211 		gpc->khva = NULL;
212 		gpc->valid = true;
213 
214 		write_unlock_irq(&gpc->lock);
215 
216 		new_pfn = hva_to_pfn_retry(kvm, uhva);
217 		if (is_error_noslot_pfn(new_pfn)) {
218 			ret = -EFAULT;
219 			goto map_done;
220 		}
221 
222 		if (gpc->kernel_map) {
223 			if (new_pfn == old_pfn) {
224 				new_khva = old_khva;
225 				old_pfn = KVM_PFN_ERR_FAULT;
226 				old_khva = NULL;
227 			} else if (pfn_valid(new_pfn)) {
228 				new_khva = kmap(pfn_to_page(new_pfn));
229 #ifdef CONFIG_HAS_IOMEM
230 			} else {
231 				new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
232 #endif
233 			}
234 			if (new_khva)
235 				new_khva += page_offset;
236 			else
237 				ret = -EFAULT;
238 		}
239 
240 	map_done:
241 		write_lock_irq(&gpc->lock);
242 		if (ret) {
243 			gpc->valid = false;
244 			gpc->pfn = KVM_PFN_ERR_FAULT;
245 			gpc->khva = NULL;
246 		} else {
247 			/* At this point, gpc->valid may already have been cleared */
248 			gpc->pfn = new_pfn;
249 			gpc->khva = new_khva;
250 		}
251 	} else {
252 		/* If the HVA→PFN mapping was already valid, don't unmap it. */
253 		old_pfn = KVM_PFN_ERR_FAULT;
254 		old_khva = NULL;
255 	}
256 
257  out:
258 	if (ret)
259 		gpc->dirty = false;
260 	else
261 		gpc->dirty = dirty;
262 
263 	write_unlock_irq(&gpc->lock);
264 
265 	__release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
266 
267 	return ret;
268 }
269 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
270 
271 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
272 {
273 	void *old_khva;
274 	kvm_pfn_t old_pfn;
275 	bool old_dirty;
276 	gpa_t old_gpa;
277 
278 	write_lock_irq(&gpc->lock);
279 
280 	gpc->valid = false;
281 
282 	old_khva = gpc->khva - offset_in_page(gpc->khva);
283 	old_dirty = gpc->dirty;
284 	old_gpa = gpc->gpa;
285 	old_pfn = gpc->pfn;
286 
287 	/*
288 	 * We can leave the GPA → uHVA map cache intact but the PFN
289 	 * lookup will need to be redone even for the same page.
290 	 */
291 	gpc->khva = NULL;
292 	gpc->pfn = KVM_PFN_ERR_FAULT;
293 
294 	write_unlock_irq(&gpc->lock);
295 
296 	__release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
297 }
298 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
299 
300 
301 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
302 			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
303 			      bool kernel_map, gpa_t gpa, unsigned long len,
304 			      bool dirty)
305 {
306 	if (!gpc->active) {
307 		rwlock_init(&gpc->lock);
308 
309 		gpc->khva = NULL;
310 		gpc->pfn = KVM_PFN_ERR_FAULT;
311 		gpc->uhva = KVM_HVA_ERR_BAD;
312 		gpc->vcpu = vcpu;
313 		gpc->kernel_map = kernel_map;
314 		gpc->guest_uses_pa = guest_uses_pa;
315 		gpc->valid = false;
316 		gpc->active = true;
317 
318 		spin_lock(&kvm->gpc_lock);
319 		list_add(&gpc->list, &kvm->gpc_list);
320 		spin_unlock(&kvm->gpc_lock);
321 	}
322 	return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
323 }
324 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
325 
326 void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
327 {
328 	if (gpc->active) {
329 		spin_lock(&kvm->gpc_lock);
330 		list_del(&gpc->list);
331 		spin_unlock(&kvm->gpc_lock);
332 
333 		kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
334 		gpc->active = false;
335 	}
336 }
337 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);
338