xref: /openbmc/linux/virt/kvm/async_pf.c (revision a75afe48)
1775c8a3dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2af585b92SGleb Natapov /*
3af585b92SGleb Natapov  * kvm asynchronous fault support
4af585b92SGleb Natapov  *
5af585b92SGleb Natapov  * Copyright 2010 Red Hat, Inc.
6af585b92SGleb Natapov  *
7af585b92SGleb Natapov  * Author:
8af585b92SGleb Natapov  *      Gleb Natapov <gleb@redhat.com>
9af585b92SGleb Natapov  */
10af585b92SGleb Natapov 
11af585b92SGleb Natapov #include <linux/kvm_host.h>
12af585b92SGleb Natapov #include <linux/slab.h>
13af585b92SGleb Natapov #include <linux/module.h>
14af585b92SGleb Natapov #include <linux/mmu_context.h>
156e84f315SIngo Molnar #include <linux/sched/mm.h>
16af585b92SGleb Natapov 
17af585b92SGleb Natapov #include "async_pf.h"
18af585b92SGleb Natapov #include <trace/events/kvm.h>
19af585b92SGleb Natapov 
20af585b92SGleb Natapov static struct kmem_cache *async_pf_cache;
21af585b92SGleb Natapov 
kvm_async_pf_init(void)22af585b92SGleb Natapov int kvm_async_pf_init(void)
23af585b92SGleb Natapov {
24af585b92SGleb Natapov 	async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
25af585b92SGleb Natapov 
26af585b92SGleb Natapov 	if (!async_pf_cache)
27af585b92SGleb Natapov 		return -ENOMEM;
28af585b92SGleb Natapov 
29af585b92SGleb Natapov 	return 0;
30af585b92SGleb Natapov }
31af585b92SGleb Natapov 
kvm_async_pf_deinit(void)32af585b92SGleb Natapov void kvm_async_pf_deinit(void)
33af585b92SGleb Natapov {
34af585b92SGleb Natapov 	kmem_cache_destroy(async_pf_cache);
35af585b92SGleb Natapov 	async_pf_cache = NULL;
36af585b92SGleb Natapov }
37af585b92SGleb Natapov 
kvm_async_pf_vcpu_init(struct kvm_vcpu * vcpu)38af585b92SGleb Natapov void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
39af585b92SGleb Natapov {
40af585b92SGleb Natapov 	INIT_LIST_HEAD(&vcpu->async_pf.done);
41af585b92SGleb Natapov 	INIT_LIST_HEAD(&vcpu->async_pf.queue);
42af585b92SGleb Natapov 	spin_lock_init(&vcpu->async_pf.lock);
43af585b92SGleb Natapov }
44af585b92SGleb Natapov 
async_pf_execute(struct work_struct * work)45af585b92SGleb Natapov static void async_pf_execute(struct work_struct *work)
46af585b92SGleb Natapov {
47af585b92SGleb Natapov 	struct kvm_async_pf *apf =
48af585b92SGleb Natapov 		container_of(work, struct kvm_async_pf, work);
49af585b92SGleb Natapov 	struct mm_struct *mm = apf->mm;
50af585b92SGleb Natapov 	struct kvm_vcpu *vcpu = apf->vcpu;
51af585b92SGleb Natapov 	unsigned long addr = apf->addr;
52736c291cSSean Christopherson 	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
538b7457efSLorenzo Stoakes 	int locked = 1;
54557a961aSVitaly Kuznetsov 	bool first;
55af585b92SGleb Natapov 
56af585b92SGleb Natapov 	might_sleep();
57af585b92SGleb Natapov 
581e987790SDave Hansen 	/*
59bdd303cbSWei Yang 	 * This work is run asynchronously to the task which owns
601e987790SDave Hansen 	 * mm and might be done in another context, so we must
618b7457efSLorenzo Stoakes 	 * access remotely.
621e987790SDave Hansen 	 */
63d8ed45c5SMichel Lespinasse 	mmap_read_lock(mm);
64ca5e8632SLorenzo Stoakes 	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
658b7457efSLorenzo Stoakes 	if (locked)
66d8ed45c5SMichel Lespinasse 		mmap_read_unlock(mm);
671e987790SDave Hansen 
684425f567SPaolo Bonzini 	if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
694425f567SPaolo Bonzini 		kvm_arch_async_page_present(vcpu, apf);
70af585b92SGleb Natapov 
71af585b92SGleb Natapov 	spin_lock(&vcpu->async_pf.lock);
72557a961aSVitaly Kuznetsov 	first = list_empty(&vcpu->async_pf.done);
73af585b92SGleb Natapov 	list_add_tail(&apf->link, &vcpu->async_pf.done);
7422583f0dSPaolo Bonzini 	apf->vcpu = NULL;
75af585b92SGleb Natapov 	spin_unlock(&vcpu->async_pf.lock);
76af585b92SGleb Natapov 
77557a961aSVitaly Kuznetsov 	if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
78557a961aSVitaly Kuznetsov 		kvm_arch_async_page_present_queued(vcpu);
79557a961aSVitaly Kuznetsov 
80af585b92SGleb Natapov 	/*
81af585b92SGleb Natapov 	 * apf may be freed by kvm_check_async_pf_completion() after
82af585b92SGleb Natapov 	 * this point
83af585b92SGleb Natapov 	 */
84af585b92SGleb Natapov 
85736c291cSSean Christopherson 	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
86af585b92SGleb Natapov 
87d92a5d1cSSean Christopherson 	__kvm_vcpu_wake_up(vcpu);
88af585b92SGleb Natapov 
8941c22f62SOleg Nesterov 	mmput(mm);
90a75afe48SSean Christopherson }
91a75afe48SSean Christopherson 
kvm_flush_and_free_async_pf_work(struct kvm_async_pf * work)92a75afe48SSean Christopherson static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
93a75afe48SSean Christopherson {
94a75afe48SSean Christopherson 	/*
95a75afe48SSean Christopherson 	 * The async #PF is "done", but KVM must wait for the work item itself,
96a75afe48SSean Christopherson 	 * i.e. async_pf_execute(), to run to completion.  If KVM is a module,
97a75afe48SSean Christopherson 	 * KVM must ensure *no* code owned by the KVM (the module) can be run
98a75afe48SSean Christopherson 	 * after the last call to module_put().  Note, flushing the work item
99a75afe48SSean Christopherson 	 * is always required when the item is taken off the completion queue.
100a75afe48SSean Christopherson 	 * E.g. even if the vCPU handles the item in the "normal" path, the VM
101a75afe48SSean Christopherson 	 * could be terminated before async_pf_execute() completes.
102a75afe48SSean Christopherson 	 *
103a75afe48SSean Christopherson 	 * Wake all events skip the queue and go straight done, i.e. don't
104a75afe48SSean Christopherson 	 * need to be flushed (but sanity check that the work wasn't queued).
105a75afe48SSean Christopherson 	 */
106a75afe48SSean Christopherson 	if (work->wakeup_all)
107a75afe48SSean Christopherson 		WARN_ON_ONCE(work->work.func);
108a75afe48SSean Christopherson 	else
109a75afe48SSean Christopherson 		flush_work(&work->work);
110a75afe48SSean Christopherson 	kmem_cache_free(async_pf_cache, work);
111af585b92SGleb Natapov }
112af585b92SGleb Natapov 
kvm_clear_async_pf_completion_queue(struct kvm_vcpu * vcpu)113af585b92SGleb Natapov void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
114af585b92SGleb Natapov {
11522583f0dSPaolo Bonzini 	spin_lock(&vcpu->async_pf.lock);
11622583f0dSPaolo Bonzini 
117af585b92SGleb Natapov 	/* cancel outstanding work queue item */
118af585b92SGleb Natapov 	while (!list_empty(&vcpu->async_pf.queue)) {
119af585b92SGleb Natapov 		struct kvm_async_pf *work =
120433da860SGeliang Tang 			list_first_entry(&vcpu->async_pf.queue,
121af585b92SGleb Natapov 					 typeof(*work), queue);
122af585b92SGleb Natapov 		list_del(&work->queue);
1239f2ceda4SDominik Dingel 
12422583f0dSPaolo Bonzini 		/*
12522583f0dSPaolo Bonzini 		 * We know it's present in vcpu->async_pf.done, do
12622583f0dSPaolo Bonzini 		 * nothing here.
12722583f0dSPaolo Bonzini 		 */
12822583f0dSPaolo Bonzini 		if (!work->vcpu)
12922583f0dSPaolo Bonzini 			continue;
13022583f0dSPaolo Bonzini 
13122583f0dSPaolo Bonzini 		spin_unlock(&vcpu->async_pf.lock);
1329f2ceda4SDominik Dingel #ifdef CONFIG_KVM_ASYNC_PF_SYNC
1339f2ceda4SDominik Dingel 		flush_work(&work->work);
1349f2ceda4SDominik Dingel #else
13598fda169SRadim Krčmář 		if (cancel_work_sync(&work->work)) {
13641c22f62SOleg Nesterov 			mmput(work->mm);
137af585b92SGleb Natapov 			kmem_cache_free(async_pf_cache, work);
138af585b92SGleb Natapov 		}
1399f2ceda4SDominik Dingel #endif
14022583f0dSPaolo Bonzini 		spin_lock(&vcpu->async_pf.lock);
14128b441e2SRadim Krčmář 	}
142af585b92SGleb Natapov 
143af585b92SGleb Natapov 	while (!list_empty(&vcpu->async_pf.done)) {
144af585b92SGleb Natapov 		struct kvm_async_pf *work =
145433da860SGeliang Tang 			list_first_entry(&vcpu->async_pf.done,
146af585b92SGleb Natapov 					 typeof(*work), link);
147af585b92SGleb Natapov 		list_del(&work->link);
148a75afe48SSean Christopherson 
149a75afe48SSean Christopherson 		spin_unlock(&vcpu->async_pf.lock);
150a75afe48SSean Christopherson 		kvm_flush_and_free_async_pf_work(work);
151a75afe48SSean Christopherson 		spin_lock(&vcpu->async_pf.lock);
152af585b92SGleb Natapov 	}
153af585b92SGleb Natapov 	spin_unlock(&vcpu->async_pf.lock);
154af585b92SGleb Natapov 
155af585b92SGleb Natapov 	vcpu->async_pf.queued = 0;
156af585b92SGleb Natapov }
157af585b92SGleb Natapov 
kvm_check_async_pf_completion(struct kvm_vcpu * vcpu)158af585b92SGleb Natapov void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
159af585b92SGleb Natapov {
160af585b92SGleb Natapov 	struct kvm_async_pf *work;
161af585b92SGleb Natapov 
16215096ffcSXiao Guangrong 	while (!list_empty_careful(&vcpu->async_pf.done) &&
1637c0ade6cSVitaly Kuznetsov 	      kvm_arch_can_dequeue_async_page_present(vcpu)) {
164af585b92SGleb Natapov 		spin_lock(&vcpu->async_pf.lock);
16515096ffcSXiao Guangrong 		work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
16615096ffcSXiao Guangrong 					      link);
167af585b92SGleb Natapov 		list_del(&work->link);
168af585b92SGleb Natapov 		spin_unlock(&vcpu->async_pf.lock);
169af585b92SGleb Natapov 
17056028d08SGleb Natapov 		kvm_arch_async_page_ready(vcpu, work);
1714425f567SPaolo Bonzini 		if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
1724425f567SPaolo Bonzini 			kvm_arch_async_page_present(vcpu, work);
173af585b92SGleb Natapov 
174af585b92SGleb Natapov 		list_del(&work->queue);
175af585b92SGleb Natapov 		vcpu->async_pf.queued--;
176a75afe48SSean Christopherson 		kvm_flush_and_free_async_pf_work(work);
177af585b92SGleb Natapov 	}
17815096ffcSXiao Guangrong }
179af585b92SGleb Natapov 
180e8c22266SVitaly Kuznetsov /*
181e8c22266SVitaly Kuznetsov  * Try to schedule a job to handle page fault asynchronously. Returns 'true' on
182e8c22266SVitaly Kuznetsov  * success, 'false' on failure (page fault has to be handled synchronously).
183e8c22266SVitaly Kuznetsov  */
kvm_setup_async_pf(struct kvm_vcpu * vcpu,gpa_t cr2_or_gpa,unsigned long hva,struct kvm_arch_async_pf * arch)184e8c22266SVitaly Kuznetsov bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
185736c291cSSean Christopherson 			unsigned long hva, struct kvm_arch_async_pf *arch)
186af585b92SGleb Natapov {
187af585b92SGleb Natapov 	struct kvm_async_pf *work;
188af585b92SGleb Natapov 
189af585b92SGleb Natapov 	if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
190e8c22266SVitaly Kuznetsov 		return false;
191af585b92SGleb Natapov 
1927863e346SVitaly Kuznetsov 	/* Arch specific code should not do async PF in this case */
1937863e346SVitaly Kuznetsov 	if (unlikely(kvm_is_error_hva(hva)))
194e8c22266SVitaly Kuznetsov 		return false;
195af585b92SGleb Natapov 
196af585b92SGleb Natapov 	/*
197af585b92SGleb Natapov 	 * do alloc nowait since if we are going to sleep anyway we
198af585b92SGleb Natapov 	 * may as well sleep faulting in page
199af585b92SGleb Natapov 	 */
200d7444794SChristian Borntraeger 	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
201af585b92SGleb Natapov 	if (!work)
202e8c22266SVitaly Kuznetsov 		return false;
203af585b92SGleb Natapov 
204f2e10669Schai wen 	work->wakeup_all = false;
205af585b92SGleb Natapov 	work->vcpu = vcpu;
206736c291cSSean Christopherson 	work->cr2_or_gpa = cr2_or_gpa;
207e0ead41aSDominik Dingel 	work->addr = hva;
208af585b92SGleb Natapov 	work->arch = *arch;
209af585b92SGleb Natapov 	work->mm = current->mm;
2103fce371bSVegard Nossum 	mmget(work->mm);
211af585b92SGleb Natapov 
212af585b92SGleb Natapov 	INIT_WORK(&work->work, async_pf_execute);
213af585b92SGleb Natapov 
214af585b92SGleb Natapov 	list_add_tail(&work->queue, &vcpu->async_pf.queue);
215af585b92SGleb Natapov 	vcpu->async_pf.queued++;
2162a18b7e7SVitaly Kuznetsov 	work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work);
2177863e346SVitaly Kuznetsov 
2187863e346SVitaly Kuznetsov 	schedule_work(&work->work);
2197863e346SVitaly Kuznetsov 
220e8c22266SVitaly Kuznetsov 	return true;
221af585b92SGleb Natapov }
222344d9588SGleb Natapov 
kvm_async_pf_wakeup_all(struct kvm_vcpu * vcpu)223344d9588SGleb Natapov int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
224344d9588SGleb Natapov {
225344d9588SGleb Natapov 	struct kvm_async_pf *work;
226557a961aSVitaly Kuznetsov 	bool first;
227344d9588SGleb Natapov 
22864f638c7SXiao Guangrong 	if (!list_empty_careful(&vcpu->async_pf.done))
229344d9588SGleb Natapov 		return 0;
230344d9588SGleb Natapov 
231344d9588SGleb Natapov 	work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
232344d9588SGleb Natapov 	if (!work)
233344d9588SGleb Natapov 		return -ENOMEM;
234344d9588SGleb Natapov 
235f2e10669Schai wen 	work->wakeup_all = true;
236344d9588SGleb Natapov 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
237344d9588SGleb Natapov 
23864f638c7SXiao Guangrong 	spin_lock(&vcpu->async_pf.lock);
239557a961aSVitaly Kuznetsov 	first = list_empty(&vcpu->async_pf.done);
240344d9588SGleb Natapov 	list_add_tail(&work->link, &vcpu->async_pf.done);
24164f638c7SXiao Guangrong 	spin_unlock(&vcpu->async_pf.lock);
24264f638c7SXiao Guangrong 
243557a961aSVitaly Kuznetsov 	if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
244557a961aSVitaly Kuznetsov 		kvm_arch_async_page_present_queued(vcpu);
245557a961aSVitaly Kuznetsov 
246344d9588SGleb Natapov 	vcpu->async_pf.queued++;
247344d9588SGleb Natapov 	return 0;
248344d9588SGleb Natapov }
249