1775c8a3dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2af585b92SGleb Natapov /* 3af585b92SGleb Natapov * kvm asynchronous fault support 4af585b92SGleb Natapov * 5af585b92SGleb Natapov * Copyright 2010 Red Hat, Inc. 6af585b92SGleb Natapov * 7af585b92SGleb Natapov * Author: 8af585b92SGleb Natapov * Gleb Natapov <gleb@redhat.com> 9af585b92SGleb Natapov */ 10af585b92SGleb Natapov 11af585b92SGleb Natapov #include <linux/kvm_host.h> 12af585b92SGleb Natapov #include <linux/slab.h> 13af585b92SGleb Natapov #include <linux/module.h> 14af585b92SGleb Natapov #include <linux/mmu_context.h> 156e84f315SIngo Molnar #include <linux/sched/mm.h> 16af585b92SGleb Natapov 17af585b92SGleb Natapov #include "async_pf.h" 18af585b92SGleb Natapov #include <trace/events/kvm.h> 19af585b92SGleb Natapov 20af585b92SGleb Natapov static struct kmem_cache *async_pf_cache; 21af585b92SGleb Natapov 22af585b92SGleb Natapov int kvm_async_pf_init(void) 23af585b92SGleb Natapov { 24af585b92SGleb Natapov async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); 25af585b92SGleb Natapov 26af585b92SGleb Natapov if (!async_pf_cache) 27af585b92SGleb Natapov return -ENOMEM; 28af585b92SGleb Natapov 29af585b92SGleb Natapov return 0; 30af585b92SGleb Natapov } 31af585b92SGleb Natapov 32af585b92SGleb Natapov void kvm_async_pf_deinit(void) 33af585b92SGleb Natapov { 34af585b92SGleb Natapov kmem_cache_destroy(async_pf_cache); 35af585b92SGleb Natapov async_pf_cache = NULL; 36af585b92SGleb Natapov } 37af585b92SGleb Natapov 38af585b92SGleb Natapov void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) 39af585b92SGleb Natapov { 40af585b92SGleb Natapov INIT_LIST_HEAD(&vcpu->async_pf.done); 41af585b92SGleb Natapov INIT_LIST_HEAD(&vcpu->async_pf.queue); 42af585b92SGleb Natapov spin_lock_init(&vcpu->async_pf.lock); 43af585b92SGleb Natapov } 44af585b92SGleb Natapov 45af585b92SGleb Natapov static void async_pf_execute(struct work_struct *work) 46af585b92SGleb Natapov { 47af585b92SGleb Natapov struct kvm_async_pf *apf = 48af585b92SGleb Natapov container_of(work, struct kvm_async_pf, work); 49af585b92SGleb Natapov struct mm_struct *mm = apf->mm; 50af585b92SGleb Natapov struct kvm_vcpu *vcpu = apf->vcpu; 51af585b92SGleb Natapov unsigned long addr = apf->addr; 52736c291cSSean Christopherson gpa_t cr2_or_gpa = apf->cr2_or_gpa; 538b7457efSLorenzo Stoakes int locked = 1; 54af585b92SGleb Natapov 55af585b92SGleb Natapov might_sleep(); 56af585b92SGleb Natapov 571e987790SDave Hansen /* 58bdd303cbSWei Yang * This work is run asynchronously to the task which owns 591e987790SDave Hansen * mm and might be done in another context, so we must 608b7457efSLorenzo Stoakes * access remotely. 611e987790SDave Hansen */ 628b7457efSLorenzo Stoakes down_read(&mm->mmap_sem); 638b7457efSLorenzo Stoakes get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL, 648b7457efSLorenzo Stoakes &locked); 658b7457efSLorenzo Stoakes if (locked) 668b7457efSLorenzo Stoakes up_read(&mm->mmap_sem); 671e987790SDave Hansen 684425f567SPaolo Bonzini if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) 694425f567SPaolo Bonzini kvm_arch_async_page_present(vcpu, apf); 70af585b92SGleb Natapov 71af585b92SGleb Natapov spin_lock(&vcpu->async_pf.lock); 72af585b92SGleb Natapov list_add_tail(&apf->link, &vcpu->async_pf.done); 7322583f0dSPaolo Bonzini apf->vcpu = NULL; 74af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 75af585b92SGleb Natapov 76af585b92SGleb Natapov /* 77af585b92SGleb Natapov * apf may be freed by kvm_check_async_pf_completion() after 78af585b92SGleb Natapov * this point 79af585b92SGleb Natapov */ 80af585b92SGleb Natapov 81736c291cSSean Christopherson trace_kvm_async_pf_completed(addr, cr2_or_gpa); 82af585b92SGleb Natapov 83da4ad88cSDavidlohr Bueso rcuwait_wake_up(&vcpu->wait); 84af585b92SGleb Natapov 8541c22f62SOleg Nesterov mmput(mm); 86af585b92SGleb Natapov kvm_put_kvm(vcpu->kvm); 87af585b92SGleb Natapov } 88af585b92SGleb Natapov 89af585b92SGleb Natapov void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) 90af585b92SGleb Natapov { 9122583f0dSPaolo Bonzini spin_lock(&vcpu->async_pf.lock); 9222583f0dSPaolo Bonzini 93af585b92SGleb Natapov /* cancel outstanding work queue item */ 94af585b92SGleb Natapov while (!list_empty(&vcpu->async_pf.queue)) { 95af585b92SGleb Natapov struct kvm_async_pf *work = 96433da860SGeliang Tang list_first_entry(&vcpu->async_pf.queue, 97af585b92SGleb Natapov typeof(*work), queue); 98af585b92SGleb Natapov list_del(&work->queue); 999f2ceda4SDominik Dingel 10022583f0dSPaolo Bonzini /* 10122583f0dSPaolo Bonzini * We know it's present in vcpu->async_pf.done, do 10222583f0dSPaolo Bonzini * nothing here. 10322583f0dSPaolo Bonzini */ 10422583f0dSPaolo Bonzini if (!work->vcpu) 10522583f0dSPaolo Bonzini continue; 10622583f0dSPaolo Bonzini 10722583f0dSPaolo Bonzini spin_unlock(&vcpu->async_pf.lock); 1089f2ceda4SDominik Dingel #ifdef CONFIG_KVM_ASYNC_PF_SYNC 1099f2ceda4SDominik Dingel flush_work(&work->work); 1109f2ceda4SDominik Dingel #else 11198fda169SRadim Krčmář if (cancel_work_sync(&work->work)) { 11241c22f62SOleg Nesterov mmput(work->mm); 11328b441e2SRadim Krčmář kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 114af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 115af585b92SGleb Natapov } 1169f2ceda4SDominik Dingel #endif 11722583f0dSPaolo Bonzini spin_lock(&vcpu->async_pf.lock); 11828b441e2SRadim Krčmář } 119af585b92SGleb Natapov 120af585b92SGleb Natapov while (!list_empty(&vcpu->async_pf.done)) { 121af585b92SGleb Natapov struct kvm_async_pf *work = 122433da860SGeliang Tang list_first_entry(&vcpu->async_pf.done, 123af585b92SGleb Natapov typeof(*work), link); 124af585b92SGleb Natapov list_del(&work->link); 125af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 126af585b92SGleb Natapov } 127af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 128af585b92SGleb Natapov 129af585b92SGleb Natapov vcpu->async_pf.queued = 0; 130af585b92SGleb Natapov } 131af585b92SGleb Natapov 132af585b92SGleb Natapov void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) 133af585b92SGleb Natapov { 134af585b92SGleb Natapov struct kvm_async_pf *work; 135af585b92SGleb Natapov 13615096ffcSXiao Guangrong while (!list_empty_careful(&vcpu->async_pf.done) && 1377c0ade6cSVitaly Kuznetsov kvm_arch_can_dequeue_async_page_present(vcpu)) { 138af585b92SGleb Natapov spin_lock(&vcpu->async_pf.lock); 13915096ffcSXiao Guangrong work = list_first_entry(&vcpu->async_pf.done, typeof(*work), 14015096ffcSXiao Guangrong link); 141af585b92SGleb Natapov list_del(&work->link); 142af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 143af585b92SGleb Natapov 14456028d08SGleb Natapov kvm_arch_async_page_ready(vcpu, work); 1454425f567SPaolo Bonzini if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) 1464425f567SPaolo Bonzini kvm_arch_async_page_present(vcpu, work); 147af585b92SGleb Natapov 148af585b92SGleb Natapov list_del(&work->queue); 149af585b92SGleb Natapov vcpu->async_pf.queued--; 150af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 151af585b92SGleb Natapov } 15215096ffcSXiao Guangrong } 153af585b92SGleb Natapov 154736c291cSSean Christopherson int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, 155736c291cSSean Christopherson unsigned long hva, struct kvm_arch_async_pf *arch) 156af585b92SGleb Natapov { 157af585b92SGleb Natapov struct kvm_async_pf *work; 158af585b92SGleb Natapov 159af585b92SGleb Natapov if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) 160af585b92SGleb Natapov return 0; 161af585b92SGleb Natapov 162af585b92SGleb Natapov /* setup delayed work */ 163af585b92SGleb Natapov 164af585b92SGleb Natapov /* 165af585b92SGleb Natapov * do alloc nowait since if we are going to sleep anyway we 166af585b92SGleb Natapov * may as well sleep faulting in page 167af585b92SGleb Natapov */ 168d7444794SChristian Borntraeger work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); 169af585b92SGleb Natapov if (!work) 170af585b92SGleb Natapov return 0; 171af585b92SGleb Natapov 172f2e10669Schai wen work->wakeup_all = false; 173af585b92SGleb Natapov work->vcpu = vcpu; 174736c291cSSean Christopherson work->cr2_or_gpa = cr2_or_gpa; 175e0ead41aSDominik Dingel work->addr = hva; 176af585b92SGleb Natapov work->arch = *arch; 177af585b92SGleb Natapov work->mm = current->mm; 1783fce371bSVegard Nossum mmget(work->mm); 179af585b92SGleb Natapov kvm_get_kvm(work->vcpu->kvm); 180af585b92SGleb Natapov 181af585b92SGleb Natapov /* this can't really happen otherwise gfn_to_pfn_async 182af585b92SGleb Natapov would succeed */ 183af585b92SGleb Natapov if (unlikely(kvm_is_error_hva(work->addr))) 184af585b92SGleb Natapov goto retry_sync; 185af585b92SGleb Natapov 186af585b92SGleb Natapov INIT_WORK(&work->work, async_pf_execute); 187af585b92SGleb Natapov if (!schedule_work(&work->work)) 188af585b92SGleb Natapov goto retry_sync; 189af585b92SGleb Natapov 190af585b92SGleb Natapov list_add_tail(&work->queue, &vcpu->async_pf.queue); 191af585b92SGleb Natapov vcpu->async_pf.queued++; 192af585b92SGleb Natapov kvm_arch_async_page_not_present(vcpu, work); 193af585b92SGleb Natapov return 1; 194af585b92SGleb Natapov retry_sync: 195af585b92SGleb Natapov kvm_put_kvm(work->vcpu->kvm); 19641c22f62SOleg Nesterov mmput(work->mm); 197af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 198af585b92SGleb Natapov return 0; 199af585b92SGleb Natapov } 200344d9588SGleb Natapov 201344d9588SGleb Natapov int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) 202344d9588SGleb Natapov { 203344d9588SGleb Natapov struct kvm_async_pf *work; 204344d9588SGleb Natapov 20564f638c7SXiao Guangrong if (!list_empty_careful(&vcpu->async_pf.done)) 206344d9588SGleb Natapov return 0; 207344d9588SGleb Natapov 208344d9588SGleb Natapov work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); 209344d9588SGleb Natapov if (!work) 210344d9588SGleb Natapov return -ENOMEM; 211344d9588SGleb Natapov 212f2e10669Schai wen work->wakeup_all = true; 213344d9588SGleb Natapov INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 214344d9588SGleb Natapov 21564f638c7SXiao Guangrong spin_lock(&vcpu->async_pf.lock); 216344d9588SGleb Natapov list_add_tail(&work->link, &vcpu->async_pf.done); 21764f638c7SXiao Guangrong spin_unlock(&vcpu->async_pf.lock); 21864f638c7SXiao Guangrong 219344d9588SGleb Natapov vcpu->async_pf.queued++; 220344d9588SGleb Natapov return 0; 221344d9588SGleb Natapov } 222