xen.c (1cfc9c4b9d4606a1e90e7dbc50058b9f0c1d43a6) | xen.c (14243b387137a4afbe1df5d9dc15182d6657bb79) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 4 * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * KVM Xen emulation 7 */ 8 9#include "x86.h" 10#include "xen.h" 11#include "hyperv.h" 12 13#include <linux/kvm_host.h> 14#include <linux/sched/stat.h> 15 16#include <trace/events/kvm.h> 17#include <xen/interface/xen.h> 18#include <xen/interface/vcpu.h> | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 4 * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * KVM Xen emulation 7 */ 8 9#include "x86.h" 10#include "xen.h" 11#include "hyperv.h" 12 13#include <linux/kvm_host.h> 14#include <linux/sched/stat.h> 15 16#include <trace/events/kvm.h> 17#include <xen/interface/xen.h> 18#include <xen/interface/vcpu.h> |
19#include <xen/interface/event_channel.h> |
|
19 20#include "trace.h" 21 22DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); 23 24static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) 25{ 26 struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; --- 163 unchanged lines hidden (view full) --- 190 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 191 &state_entry_time, offset, 192 sizeof(state_entry_time))) 193 return; 194} 195 196int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 197{ | 20 21#include "trace.h" 22 23DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); 24 25static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) 26{ 27 struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; --- 163 unchanged lines hidden (view full) --- 191 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 192 &state_entry_time, offset, 193 sizeof(state_entry_time))) 194 return; 195} 196 197int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 198{ |
199 unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); 200 bool atomic = in_atomic() || !task_is_running(current); |
|
198 int err; 199 u8 rc = 0; 200 201 /* 202 * If the global upcall vector (HVMIRQ_callback_vector) is set and 203 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. 204 */ 205 struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; 206 struct kvm_memslots *slots = kvm_memslots(v->kvm); | 201 int err; 202 u8 rc = 0; 203 204 /* 205 * If the global upcall vector (HVMIRQ_callback_vector) is set and 206 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. 207 */ 208 struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; 209 struct kvm_memslots *slots = kvm_memslots(v->kvm); |
210 bool ghc_valid = slots->generation == ghc->generation && 211 !kvm_is_error_hva(ghc->hva) && ghc->memslot; 212 |
|
207 unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); 208 209 /* No need for compat handling here */ 210 BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != 211 offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); 212 BUILD_BUG_ON(sizeof(rc) != 213 sizeof_field(struct vcpu_info, evtchn_upcall_pending)); 214 BUILD_BUG_ON(sizeof(rc) != 215 sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); 216 217 /* 218 * For efficiency, this mirrors the checks for using the valid 219 * cache in kvm_read_guest_offset_cached(), but just uses 220 * __get_user() instead. And falls back to the slow path. 221 */ | 213 unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); 214 215 /* No need for compat handling here */ 216 BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != 217 offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); 218 BUILD_BUG_ON(sizeof(rc) != 219 sizeof_field(struct vcpu_info, evtchn_upcall_pending)); 220 BUILD_BUG_ON(sizeof(rc) != 221 sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); 222 223 /* 224 * For efficiency, this mirrors the checks for using the valid 225 * cache in kvm_read_guest_offset_cached(), but just uses 226 * __get_user() instead. And falls back to the slow path. 227 */ |
222 if (likely(slots->generation == ghc->generation && 223 !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { | 228 if (!evtchn_pending_sel && ghc_valid) { |
224 /* Fast path */ 225 pagefault_disable(); 226 err = __get_user(rc, (u8 __user *)ghc->hva + offset); 227 pagefault_enable(); 228 if (!err) 229 return rc; 230 } 231 232 /* Slow path */ 233 234 /* 235 * This function gets called from kvm_vcpu_block() after setting the 236 * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately 237 * from a HLT. So we really mustn't sleep. If the page ended up absent 238 * at that point, just return 1 in order to trigger an immediate wake, 239 * and we'll end up getting called again from a context where we *can* 240 * fault in the page and wait for it. 241 */ | 229 /* Fast path */ 230 pagefault_disable(); 231 err = __get_user(rc, (u8 __user *)ghc->hva + offset); 232 pagefault_enable(); 233 if (!err) 234 return rc; 235 } 236 237 /* Slow path */ 238 239 /* 240 * This function gets called from kvm_vcpu_block() after setting the 241 * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately 242 * from a HLT. So we really mustn't sleep. If the page ended up absent 243 * at that point, just return 1 in order to trigger an immediate wake, 244 * and we'll end up getting called again from a context where we *can* 245 * fault in the page and wait for it. 246 */ |
242 if (in_atomic() || !task_is_running(current)) | 247 if (atomic) |
243 return 1; 244 | 248 return 1; 249 |
245 kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 246 sizeof(rc)); | 250 if (!ghc_valid) { 251 err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len); 252 if (err || !ghc->memslot) { 253 /* 254 * If this failed, userspace has screwed up the 255 * vcpu_info mapping. No interrupts for you. 256 */ 257 return 0; 258 } 259 } |
247 | 260 |
261 /* 262 * Now we have a valid (protected by srcu) userspace HVA in 263 * ghc->hva which points to the struct vcpu_info. If there 264 * are any bits in the in-kernel evtchn_pending_sel then 265 * we need to write those to the guest vcpu_info and set 266 * its evtchn_upcall_pending flag. If there aren't any bits 267 * to add, we only want to *check* evtchn_upcall_pending. 268 */ 269 if (evtchn_pending_sel) { 270 bool long_mode = v->kvm->arch.xen.long_mode; 271 272 if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info))) 273 return 0; 274 275 if (IS_ENABLED(CONFIG_64BIT) && long_mode) { 276 struct vcpu_info __user *vi = (void __user *)ghc->hva; 277 278 /* Attempt to set the evtchn_pending_sel bits in the 279 * guest, and if that succeeds then clear the same 280 * bits in the in-kernel version. */ 281 asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n" 282 "\tnotq %0\n" 283 "\t" LOCK_PREFIX "andq %0, %2\n" 284 "2:\n" 285 "\t.section .fixup,\"ax\"\n" 286 "3:\tjmp\t2b\n" 287 "\t.previous\n" 288 _ASM_EXTABLE_UA(1b, 3b) 289 : "=r" (evtchn_pending_sel), 290 "+m" (vi->evtchn_pending_sel), 291 "+m" (v->arch.xen.evtchn_pending_sel) 292 : "0" (evtchn_pending_sel)); 293 } else { 294 struct compat_vcpu_info __user *vi = (void __user *)ghc->hva; 295 u32 evtchn_pending_sel32 = evtchn_pending_sel; 296 297 /* Attempt to set the evtchn_pending_sel bits in the 298 * guest, and if that succeeds then clear the same 299 * bits in the in-kernel version. */ 300 asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n" 301 "\tnotl %0\n" 302 "\t" LOCK_PREFIX "andl %0, %2\n" 303 "2:\n" 304 "\t.section .fixup,\"ax\"\n" 305 "3:\tjmp\t2b\n" 306 "\t.previous\n" 307 _ASM_EXTABLE_UA(1b, 3b) 308 : "=r" (evtchn_pending_sel32), 309 "+m" (vi->evtchn_pending_sel), 310 "+m" (v->arch.xen.evtchn_pending_sel) 311 : "0" (evtchn_pending_sel32)); 312 } 313 rc = 1; 314 unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err); 315 316 err: 317 user_access_end(); 318 319 mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); 320 } else { 321 __get_user(rc, (u8 __user *)ghc->hva + offset); 322 } 323 |
|
248 return rc; 249} 250 251int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 252{ 253 int r = -ENOENT; 254 255 mutex_lock(&kvm->lock); --- 479 unchanged lines hidden (view full) --- 735 vcpu->run->xen.u.hcall.params[4] = params[4]; 736 vcpu->run->xen.u.hcall.params[5] = params[5]; 737 vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); 738 vcpu->arch.complete_userspace_io = 739 kvm_xen_hypercall_complete_userspace; 740 741 return 0; 742} | 324 return rc; 325} 326 327int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 328{ 329 int r = -ENOENT; 330 331 mutex_lock(&kvm->lock); --- 479 unchanged lines hidden (view full) --- 811 vcpu->run->xen.u.hcall.params[4] = params[4]; 812 vcpu->run->xen.u.hcall.params[5] = params[5]; 813 vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); 814 vcpu->arch.complete_userspace_io = 815 kvm_xen_hypercall_complete_userspace; 816 817 return 0; 818} |
819 820static inline int max_evtchn_port(struct kvm *kvm) 821{ 822 if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) 823 return EVTCHN_2L_NR_CHANNELS; 824 else 825 return COMPAT_EVTCHN_2L_NR_CHANNELS; 826} 827 828/* 829 * This follows the kvm_set_irq() API, so it returns: 830 * < 0 Interrupt was ignored (masked or not delivered for other reasons) 831 * = 0 Interrupt was coalesced (previous irq is still pending) 832 * > 0 Number of CPUs interrupt was delivered to 833 */ 834int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, 835 struct kvm *kvm) 836{ 837 struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; 838 struct kvm_vcpu *vcpu; 839 unsigned long *pending_bits, *mask_bits; 840 unsigned long flags; 841 int port_word_bit; 842 bool kick_vcpu = false; 843 int idx; 844 int rc; 845 846 vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu); 847 if (!vcpu) 848 return -1; 849 850 if (!vcpu->arch.xen.vcpu_info_set) 851 return -1; 852 853 if (e->xen_evtchn.port >= max_evtchn_port(kvm)) 854 return -1; 855 856 rc = -EWOULDBLOCK; 857 read_lock_irqsave(&gpc->lock, flags); 858 859 idx = srcu_read_lock(&kvm->srcu); 860 if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) 861 goto out_rcu; 862 863 if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { 864 struct shared_info *shinfo = gpc->khva; 865 pending_bits = (unsigned long *)&shinfo->evtchn_pending; 866 mask_bits = (unsigned long *)&shinfo->evtchn_mask; 867 port_word_bit = e->xen_evtchn.port / 64; 868 } else { 869 struct compat_shared_info *shinfo = gpc->khva; 870 pending_bits = (unsigned long *)&shinfo->evtchn_pending; 871 mask_bits = (unsigned long *)&shinfo->evtchn_mask; 872 port_word_bit = e->xen_evtchn.port / 32; 873 } 874 875 /* 876 * If this port wasn't already set, and if it isn't masked, then 877 * we try to set the corresponding bit in the in-kernel shadow of 878 * evtchn_pending_sel for the target vCPU. And if *that* wasn't 879 * already set, then we kick the vCPU in question to write to the 880 * *real* evtchn_pending_sel in its own guest vcpu_info struct. 881 */ 882 if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) { 883 rc = 0; /* It was already raised */ 884 } else if (test_bit(e->xen_evtchn.port, mask_bits)) { 885 rc = -1; /* Masked */ 886 } else { 887 rc = 1; /* Delivered. But was the vCPU waking already? */ 888 if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) 889 kick_vcpu = true; 890 } 891 892 out_rcu: 893 srcu_read_unlock(&kvm->srcu, idx); 894 read_unlock_irqrestore(&gpc->lock, flags); 895 896 if (kick_vcpu) { 897 kvm_make_request(KVM_REQ_EVENT, vcpu); 898 kvm_vcpu_kick(vcpu); 899 } 900 901 return rc; 902} 903 904/* This is the version called from kvm_set_irq() as the .set function */ 905static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, 906 int irq_source_id, int level, bool line_status) 907{ 908 bool mm_borrowed = false; 909 int rc; 910 911 if (!level) 912 return -1; 913 914 rc = kvm_xen_set_evtchn_fast(e, kvm); 915 if (rc != -EWOULDBLOCK) 916 return rc; 917 918 if (current->mm != kvm->mm) { 919 /* 920 * If not on a thread which already belongs to this KVM, 921 * we'd better be in the irqfd workqueue. 922 */ 923 if (WARN_ON_ONCE(current->mm)) 924 return -EINVAL; 925 926 kthread_use_mm(kvm->mm); 927 mm_borrowed = true; 928 } 929 930 /* 931 * For the irqfd workqueue, using the main kvm->lock mutex is 932 * fine since this function is invoked from kvm_set_irq() with 933 * no other lock held, no srcu. In future if it will be called 934 * directly from a vCPU thread (e.g. on hypercall for an IPI) 935 * then it may need to switch to using a leaf-node mutex for 936 * serializing the shared_info mapping. 937 */ 938 mutex_lock(&kvm->lock); 939 940 /* 941 * It is theoretically possible for the page to be unmapped 942 * and the MMU notifier to invalidate the shared_info before 943 * we even get to use it. In that case, this looks like an 944 * infinite loop. It was tempting to do it via the userspace 945 * HVA instead... but that just *hides* the fact that it's 946 * an infinite loop, because if a fault occurs and it waits 947 * for the page to come back, it can *still* immediately 948 * fault and have to wait again, repeatedly. 949 * 950 * Conversely, the page could also have been reinstated by 951 * another thread before we even obtain the mutex above, so 952 * check again *first* before remapping it. 953 */ 954 do { 955 struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; 956 int idx; 957 958 rc = kvm_xen_set_evtchn_fast(e, kvm); 959 if (rc != -EWOULDBLOCK) 960 break; 961 962 idx = srcu_read_lock(&kvm->srcu); 963 rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, 964 PAGE_SIZE, false); 965 srcu_read_unlock(&kvm->srcu, idx); 966 } while(!rc); 967 968 mutex_unlock(&kvm->lock); 969 970 if (mm_borrowed) 971 kthread_unuse_mm(kvm->mm); 972 973 return rc; 974} 975 976int kvm_xen_setup_evtchn(struct kvm *kvm, 977 struct kvm_kernel_irq_routing_entry *e, 978 const struct kvm_irq_routing_entry *ue) 979 980{ 981 if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) 982 return -EINVAL; 983 984 /* We only support 2 level event channels for now */ 985 if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) 986 return -EINVAL; 987 988 e->xen_evtchn.port = ue->u.xen_evtchn.port; 989 e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu; 990 e->xen_evtchn.priority = ue->u.xen_evtchn.priority; 991 e->set = evtchn_set_fn; 992 993 return 0; 994} |
|