xref: /openbmc/linux/arch/x86/kvm/mmu/page_track.c (revision 9aa2cba7a275b2c0b10c95ea60aced015a5535e1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Support KVM gust page tracking
4  *
5  * This feature allows us to track page access in guest. Currently, only
6  * write access is tracked.
7  *
8  * Copyright(C) 2015 Intel Corporation.
9  *
10  * Author:
11  *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
12  */
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/lockdep.h>
16 #include <linux/kvm_host.h>
17 #include <linux/rculist.h>
18 
19 #include "mmu.h"
20 #include "mmu_internal.h"
21 #include "page_track.h"
22 
23 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
24 {
25 	return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
26 	       !tdp_enabled || kvm_shadow_root_allocated(kvm);
27 }
28 
29 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
30 {
31 	kvfree(slot->arch.gfn_write_track);
32 	slot->arch.gfn_write_track = NULL;
33 }
34 
35 static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
36 						 unsigned long npages)
37 {
38 	const size_t size = sizeof(*slot->arch.gfn_write_track);
39 
40 	if (!slot->arch.gfn_write_track)
41 		slot->arch.gfn_write_track = __vcalloc(npages, size,
42 						       GFP_KERNEL_ACCOUNT);
43 
44 	return slot->arch.gfn_write_track ? 0 : -ENOMEM;
45 }
46 
47 int kvm_page_track_create_memslot(struct kvm *kvm,
48 				  struct kvm_memory_slot *slot,
49 				  unsigned long npages)
50 {
51 	if (!kvm_page_track_write_tracking_enabled(kvm))
52 		return 0;
53 
54 	return __kvm_page_track_write_tracking_alloc(slot, npages);
55 }
56 
57 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
58 {
59 	return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
60 }
61 
62 static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
63 				   short count)
64 {
65 	int index, val;
66 
67 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
68 
69 	val = slot->arch.gfn_write_track[index];
70 
71 	if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
72 		return;
73 
74 	slot->arch.gfn_write_track[index] += count;
75 }
76 
77 void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
78 			       gfn_t gfn)
79 {
80 	lockdep_assert_held_write(&kvm->mmu_lock);
81 
82 	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
83 			    srcu_read_lock_held(&kvm->srcu));
84 
85 	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
86 		return;
87 
88 	update_gfn_write_track(slot, gfn, 1);
89 
90 	/*
91 	 * new track stops large page mapping for the
92 	 * tracked page.
93 	 */
94 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
95 
96 	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
97 		kvm_flush_remote_tlbs(kvm);
98 }
99 
100 void __kvm_write_track_remove_gfn(struct kvm *kvm,
101 				  struct kvm_memory_slot *slot, gfn_t gfn)
102 {
103 	lockdep_assert_held_write(&kvm->mmu_lock);
104 
105 	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
106 			    srcu_read_lock_held(&kvm->srcu));
107 
108 	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
109 		return;
110 
111 	update_gfn_write_track(slot, gfn, -1);
112 
113 	/*
114 	 * allow large page mapping for the tracked page
115 	 * after the tracker is gone.
116 	 */
117 	kvm_mmu_gfn_allow_lpage(slot, gfn);
118 }
119 
120 /*
121  * check if the corresponding access on the specified guest page is tracked.
122  */
123 bool kvm_gfn_is_write_tracked(struct kvm *kvm,
124 			      const struct kvm_memory_slot *slot, gfn_t gfn)
125 {
126 	int index;
127 
128 	if (!slot)
129 		return false;
130 
131 	if (!kvm_page_track_write_tracking_enabled(kvm))
132 		return false;
133 
134 	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
135 	return !!READ_ONCE(slot->arch.gfn_write_track[index]);
136 }
137 
138 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
139 void kvm_page_track_cleanup(struct kvm *kvm)
140 {
141 	struct kvm_page_track_notifier_head *head;
142 
143 	head = &kvm->arch.track_notifier_head;
144 	cleanup_srcu_struct(&head->track_srcu);
145 }
146 
147 int kvm_page_track_init(struct kvm *kvm)
148 {
149 	struct kvm_page_track_notifier_head *head;
150 
151 	head = &kvm->arch.track_notifier_head;
152 	INIT_HLIST_HEAD(&head->track_notifier_list);
153 	return init_srcu_struct(&head->track_srcu);
154 }
155 
156 /*
157  * register the notifier so that event interception for the tracked guest
158  * pages can be received.
159  */
160 int kvm_page_track_register_notifier(struct kvm *kvm,
161 				     struct kvm_page_track_notifier_node *n)
162 {
163 	struct kvm_page_track_notifier_head *head;
164 
165 	if (!kvm || kvm->mm != current->mm)
166 		return -ESRCH;
167 
168 	kvm_get_kvm(kvm);
169 
170 	head = &kvm->arch.track_notifier_head;
171 
172 	write_lock(&kvm->mmu_lock);
173 	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
174 	write_unlock(&kvm->mmu_lock);
175 	return 0;
176 }
177 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
178 
179 /*
180  * stop receiving the event interception. It is the opposed operation of
181  * kvm_page_track_register_notifier().
182  */
183 void kvm_page_track_unregister_notifier(struct kvm *kvm,
184 					struct kvm_page_track_notifier_node *n)
185 {
186 	struct kvm_page_track_notifier_head *head;
187 
188 	head = &kvm->arch.track_notifier_head;
189 
190 	write_lock(&kvm->mmu_lock);
191 	hlist_del_rcu(&n->node);
192 	write_unlock(&kvm->mmu_lock);
193 	synchronize_srcu(&head->track_srcu);
194 
195 	kvm_put_kvm(kvm);
196 }
197 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
198 
199 /*
200  * Notify the node that write access is intercepted and write emulation is
201  * finished at this time.
202  *
203  * The node should figure out if the written page is the one that node is
204  * interested in by itself.
205  */
206 void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
207 {
208 	struct kvm_page_track_notifier_head *head;
209 	struct kvm_page_track_notifier_node *n;
210 	int idx;
211 
212 	head = &kvm->arch.track_notifier_head;
213 
214 	if (hlist_empty(&head->track_notifier_list))
215 		return;
216 
217 	idx = srcu_read_lock(&head->track_srcu);
218 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
219 				  srcu_read_lock_held(&head->track_srcu))
220 		if (n->track_write)
221 			n->track_write(gpa, new, bytes, n);
222 	srcu_read_unlock(&head->track_srcu, idx);
223 }
224 
225 /*
226  * Notify external page track nodes that a memory region is being removed from
227  * the VM, e.g. so that users can free any associated metadata.
228  */
229 void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
230 {
231 	struct kvm_page_track_notifier_head *head;
232 	struct kvm_page_track_notifier_node *n;
233 	int idx;
234 
235 	head = &kvm->arch.track_notifier_head;
236 
237 	if (hlist_empty(&head->track_notifier_list))
238 		return;
239 
240 	idx = srcu_read_lock(&head->track_srcu);
241 	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
242 				  srcu_read_lock_held(&head->track_srcu))
243 		if (n->track_remove_region)
244 			n->track_remove_region(slot->base_gfn, slot->npages, n);
245 	srcu_read_unlock(&head->track_srcu, idx);
246 }
247 
248 /*
249  * add guest page to the tracking pool so that corresponding access on that
250  * page will be intercepted.
251  *
252  * @kvm: the guest instance we are interested in.
253  * @gfn: the guest page.
254  */
255 int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
256 {
257 	struct kvm_memory_slot *slot;
258 	int idx;
259 
260 	idx = srcu_read_lock(&kvm->srcu);
261 
262 	slot = gfn_to_memslot(kvm, gfn);
263 	if (!slot) {
264 		srcu_read_unlock(&kvm->srcu, idx);
265 		return -EINVAL;
266 	}
267 
268 	write_lock(&kvm->mmu_lock);
269 	__kvm_write_track_add_gfn(kvm, slot, gfn);
270 	write_unlock(&kvm->mmu_lock);
271 
272 	srcu_read_unlock(&kvm->srcu, idx);
273 
274 	return 0;
275 }
276 EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
277 
278 /*
279  * remove the guest page from the tracking pool which stops the interception
280  * of corresponding access on that page.
281  *
282  * @kvm: the guest instance we are interested in.
283  * @gfn: the guest page.
284  */
285 int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
286 {
287 	struct kvm_memory_slot *slot;
288 	int idx;
289 
290 	idx = srcu_read_lock(&kvm->srcu);
291 
292 	slot = gfn_to_memslot(kvm, gfn);
293 	if (!slot) {
294 		srcu_read_unlock(&kvm->srcu, idx);
295 		return -EINVAL;
296 	}
297 
298 	write_lock(&kvm->mmu_lock);
299 	__kvm_write_track_remove_gfn(kvm, slot, gfn);
300 	write_unlock(&kvm->mmu_lock);
301 
302 	srcu_read_unlock(&kvm->srcu, idx);
303 
304 	return 0;
305 }
306 EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
307 #endif
308