1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 4 * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * KVM Xen emulation 7 */ 8 9 #include "x86.h" 10 #include "xen.h" 11 #include "hyperv.h" 12 13 #include <linux/kvm_host.h> 14 15 #include <trace/events/kvm.h> 16 #include <xen/interface/xen.h> 17 18 #include "trace.h" 19 20 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); 21 22 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) 23 { 24 gpa_t gpa = gfn_to_gpa(gfn); 25 int wc_ofs, sec_hi_ofs; 26 int ret; 27 int idx = srcu_read_lock(&kvm->srcu); 28 29 ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache, 30 gpa, PAGE_SIZE); 31 if (ret) 32 goto out; 33 34 kvm->arch.xen.shinfo_set = true; 35 36 /* Paranoia checks on the 32-bit struct layout */ 37 BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); 38 BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); 39 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 40 41 /* 32-bit location by default */ 42 wc_ofs = offsetof(struct compat_shared_info, wc); 43 sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi); 44 45 #ifdef CONFIG_X86_64 46 /* Paranoia checks on the 64-bit struct layout */ 47 BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); 48 BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); 49 50 if (kvm->arch.xen.long_mode) { 51 wc_ofs = offsetof(struct shared_info, wc); 52 sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi); 53 } 54 #endif 55 56 kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs); 57 kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); 58 59 out: 60 srcu_read_unlock(&kvm->srcu, idx); 61 return ret; 62 } 63 64 int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 65 { 66 u8 rc = 0; 67 68 /* 69 * If the global upcall vector (HVMIRQ_callback_vector) is set and 70 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. 71 */ 72 struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; 73 struct kvm_memslots *slots = kvm_memslots(v->kvm); 74 unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); 75 76 /* No need for compat handling here */ 77 BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != 78 offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); 79 BUILD_BUG_ON(sizeof(rc) != 80 sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending)); 81 BUILD_BUG_ON(sizeof(rc) != 82 sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending)); 83 84 /* 85 * For efficiency, this mirrors the checks for using the valid 86 * cache in kvm_read_guest_offset_cached(), but just uses 87 * __get_user() instead. And falls back to the slow path. 88 */ 89 if (likely(slots->generation == ghc->generation && 90 !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { 91 /* Fast path */ 92 __get_user(rc, (u8 __user *)ghc->hva + offset); 93 } else { 94 /* Slow path */ 95 kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 96 sizeof(rc)); 97 } 98 99 return rc; 100 } 101 102 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 103 { 104 int r = -ENOENT; 105 106 mutex_lock(&kvm->lock); 107 108 switch (data->type) { 109 case KVM_XEN_ATTR_TYPE_LONG_MODE: 110 if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { 111 r = -EINVAL; 112 } else { 113 kvm->arch.xen.long_mode = !!data->u.long_mode; 114 r = 0; 115 } 116 break; 117 118 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 119 if (data->u.shared_info.gfn == GPA_INVALID) { 120 kvm->arch.xen.shinfo_set = false; 121 r = 0; 122 break; 123 } 124 r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); 125 break; 126 127 128 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 129 if (data->u.vector && data->u.vector < 0x10) 130 r = -EINVAL; 131 else { 132 kvm->arch.xen.upcall_vector = data->u.vector; 133 r = 0; 134 } 135 break; 136 137 default: 138 break; 139 } 140 141 mutex_unlock(&kvm->lock); 142 return r; 143 } 144 145 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 146 { 147 int r = -ENOENT; 148 149 mutex_lock(&kvm->lock); 150 151 switch (data->type) { 152 case KVM_XEN_ATTR_TYPE_LONG_MODE: 153 data->u.long_mode = kvm->arch.xen.long_mode; 154 r = 0; 155 break; 156 157 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 158 if (kvm->arch.xen.shinfo_set) 159 data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); 160 else 161 data->u.shared_info.gfn = GPA_INVALID; 162 r = 0; 163 break; 164 165 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 166 data->u.vector = kvm->arch.xen.upcall_vector; 167 r = 0; 168 break; 169 170 default: 171 break; 172 } 173 174 mutex_unlock(&kvm->lock); 175 return r; 176 } 177 178 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 179 { 180 int idx, r = -ENOENT; 181 182 mutex_lock(&vcpu->kvm->lock); 183 idx = srcu_read_lock(&vcpu->kvm->srcu); 184 185 switch (data->type) { 186 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 187 /* No compat necessary here. */ 188 BUILD_BUG_ON(sizeof(struct vcpu_info) != 189 sizeof(struct compat_vcpu_info)); 190 191 if (data->u.gpa == GPA_INVALID) { 192 vcpu->arch.xen.vcpu_info_set = false; 193 break; 194 } 195 196 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 197 &vcpu->arch.xen.vcpu_info_cache, 198 data->u.gpa, 199 sizeof(struct vcpu_info)); 200 if (!r) { 201 vcpu->arch.xen.vcpu_info_set = true; 202 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 203 } 204 break; 205 206 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 207 if (data->u.gpa == GPA_INVALID) { 208 vcpu->arch.xen.vcpu_time_info_set = false; 209 break; 210 } 211 212 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 213 &vcpu->arch.xen.vcpu_time_info_cache, 214 data->u.gpa, 215 sizeof(struct pvclock_vcpu_time_info)); 216 if (!r) { 217 vcpu->arch.xen.vcpu_time_info_set = true; 218 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 219 } 220 break; 221 222 default: 223 break; 224 } 225 226 srcu_read_unlock(&vcpu->kvm->srcu, idx); 227 mutex_unlock(&vcpu->kvm->lock); 228 return r; 229 } 230 231 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 232 { 233 int r = -ENOENT; 234 235 mutex_lock(&vcpu->kvm->lock); 236 237 switch (data->type) { 238 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 239 if (vcpu->arch.xen.vcpu_info_set) 240 data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; 241 else 242 data->u.gpa = GPA_INVALID; 243 r = 0; 244 break; 245 246 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 247 if (vcpu->arch.xen.vcpu_time_info_set) 248 data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; 249 else 250 data->u.gpa = GPA_INVALID; 251 r = 0; 252 break; 253 254 default: 255 break; 256 } 257 258 mutex_unlock(&vcpu->kvm->lock); 259 return r; 260 } 261 262 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) 263 { 264 struct kvm *kvm = vcpu->kvm; 265 u32 page_num = data & ~PAGE_MASK; 266 u64 page_addr = data & PAGE_MASK; 267 bool lm = is_long_mode(vcpu); 268 269 /* Latch long_mode for shared_info pages etc. */ 270 vcpu->kvm->arch.xen.long_mode = lm; 271 272 /* 273 * If Xen hypercall intercept is enabled, fill the hypercall 274 * page with VMCALL/VMMCALL instructions since that's what 275 * we catch. Else the VMM has provided the hypercall pages 276 * with instructions of its own choosing, so use those. 277 */ 278 if (kvm_xen_hypercall_enabled(kvm)) { 279 u8 instructions[32]; 280 int i; 281 282 if (page_num) 283 return 1; 284 285 /* mov imm32, %eax */ 286 instructions[0] = 0xb8; 287 288 /* vmcall / vmmcall */ 289 kvm_x86_ops.patch_hypercall(vcpu, instructions + 5); 290 291 /* ret */ 292 instructions[8] = 0xc3; 293 294 /* int3 to pad */ 295 memset(instructions + 9, 0xcc, sizeof(instructions) - 9); 296 297 for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { 298 *(u32 *)&instructions[1] = i; 299 if (kvm_vcpu_write_guest(vcpu, 300 page_addr + (i * sizeof(instructions)), 301 instructions, sizeof(instructions))) 302 return 1; 303 } 304 } else { 305 /* 306 * Note, truncation is a non-issue as 'lm' is guaranteed to be 307 * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. 308 */ 309 hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 310 : kvm->arch.xen_hvm_config.blob_addr_32; 311 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 312 : kvm->arch.xen_hvm_config.blob_size_32; 313 u8 *page; 314 315 if (page_num >= blob_size) 316 return 1; 317 318 blob_addr += page_num * PAGE_SIZE; 319 320 page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); 321 if (IS_ERR(page)) 322 return PTR_ERR(page); 323 324 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) { 325 kfree(page); 326 return 1; 327 } 328 } 329 return 0; 330 } 331 332 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) 333 { 334 if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) 335 return -EINVAL; 336 337 /* 338 * With hypercall interception the kernel generates its own 339 * hypercall page so it must not be provided. 340 */ 341 if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && 342 (xhc->blob_addr_32 || xhc->blob_addr_64 || 343 xhc->blob_size_32 || xhc->blob_size_64)) 344 return -EINVAL; 345 346 mutex_lock(&kvm->lock); 347 348 if (xhc->msr && !kvm->arch.xen_hvm_config.msr) 349 static_branch_inc(&kvm_xen_enabled.key); 350 else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) 351 static_branch_slow_dec_deferred(&kvm_xen_enabled); 352 353 memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); 354 355 mutex_unlock(&kvm->lock); 356 return 0; 357 } 358 359 void kvm_xen_destroy_vm(struct kvm *kvm) 360 { 361 if (kvm->arch.xen_hvm_config.msr) 362 static_branch_slow_dec_deferred(&kvm_xen_enabled); 363 } 364 365 static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 366 { 367 kvm_rax_write(vcpu, result); 368 return kvm_skip_emulated_instruction(vcpu); 369 } 370 371 static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 372 { 373 struct kvm_run *run = vcpu->run; 374 375 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) 376 return 1; 377 378 return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); 379 } 380 381 int kvm_xen_hypercall(struct kvm_vcpu *vcpu) 382 { 383 bool longmode; 384 u64 input, params[6]; 385 386 input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); 387 388 /* Hyper-V hypercalls get bit 31 set in EAX */ 389 if ((input & 0x80000000) && 390 kvm_hv_hypercall_enabled(vcpu)) 391 return kvm_hv_hypercall(vcpu); 392 393 longmode = is_64_bit_mode(vcpu); 394 if (!longmode) { 395 params[0] = (u32)kvm_rbx_read(vcpu); 396 params[1] = (u32)kvm_rcx_read(vcpu); 397 params[2] = (u32)kvm_rdx_read(vcpu); 398 params[3] = (u32)kvm_rsi_read(vcpu); 399 params[4] = (u32)kvm_rdi_read(vcpu); 400 params[5] = (u32)kvm_rbp_read(vcpu); 401 } 402 #ifdef CONFIG_X86_64 403 else { 404 params[0] = (u64)kvm_rdi_read(vcpu); 405 params[1] = (u64)kvm_rsi_read(vcpu); 406 params[2] = (u64)kvm_rdx_read(vcpu); 407 params[3] = (u64)kvm_r10_read(vcpu); 408 params[4] = (u64)kvm_r8_read(vcpu); 409 params[5] = (u64)kvm_r9_read(vcpu); 410 } 411 #endif 412 trace_kvm_xen_hypercall(input, params[0], params[1], params[2], 413 params[3], params[4], params[5]); 414 415 vcpu->run->exit_reason = KVM_EXIT_XEN; 416 vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; 417 vcpu->run->xen.u.hcall.longmode = longmode; 418 vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu); 419 vcpu->run->xen.u.hcall.input = input; 420 vcpu->run->xen.u.hcall.params[0] = params[0]; 421 vcpu->run->xen.u.hcall.params[1] = params[1]; 422 vcpu->run->xen.u.hcall.params[2] = params[2]; 423 vcpu->run->xen.u.hcall.params[3] = params[3]; 424 vcpu->run->xen.u.hcall.params[4] = params[4]; 425 vcpu->run->xen.u.hcall.params[5] = params[5]; 426 vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); 427 vcpu->arch.complete_userspace_io = 428 kvm_xen_hypercall_complete_userspace; 429 430 return 0; 431 } 432