1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 4 * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * KVM Xen emulation 7 */ 8 9 #include "x86.h" 10 #include "xen.h" 11 #include "hyperv.h" 12 13 #include <linux/kvm_host.h> 14 #include <linux/sched/stat.h> 15 16 #include <trace/events/kvm.h> 17 #include <xen/interface/xen.h> 18 #include <xen/interface/vcpu.h> 19 20 #include "trace.h" 21 22 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); 23 24 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) 25 { 26 gpa_t gpa = gfn_to_gpa(gfn); 27 int wc_ofs, sec_hi_ofs; 28 int ret = 0; 29 int idx = srcu_read_lock(&kvm->srcu); 30 31 if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) { 32 ret = -EFAULT; 33 goto out; 34 } 35 kvm->arch.xen.shinfo_gfn = gfn; 36 37 /* Paranoia checks on the 32-bit struct layout */ 38 BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); 39 BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); 40 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 41 42 /* 32-bit location by default */ 43 wc_ofs = offsetof(struct compat_shared_info, wc); 44 sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi); 45 46 #ifdef CONFIG_X86_64 47 /* Paranoia checks on the 64-bit struct layout */ 48 BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); 49 BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); 50 51 if (kvm->arch.xen.long_mode) { 52 wc_ofs = offsetof(struct shared_info, wc); 53 sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi); 54 } 55 #endif 56 57 kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs); 58 kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); 59 60 out: 61 srcu_read_unlock(&kvm->srcu, idx); 62 return ret; 63 } 64 65 static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) 66 { 67 struct kvm_vcpu_xen *vx = &v->arch.xen; 68 u64 now = get_kvmclock_ns(v->kvm); 69 u64 delta_ns = now - vx->runstate_entry_time; 70 u64 run_delay = current->sched_info.run_delay; 71 72 if (unlikely(!vx->runstate_entry_time)) 73 vx->current_runstate = RUNSTATE_offline; 74 75 /* 76 * Time waiting for the scheduler isn't "stolen" if the 77 * vCPU wasn't running anyway. 78 */ 79 if (vx->current_runstate == RUNSTATE_running) { 80 u64 steal_ns = run_delay - vx->last_steal; 81 82 delta_ns -= steal_ns; 83 84 vx->runstate_times[RUNSTATE_runnable] += steal_ns; 85 } 86 vx->last_steal = run_delay; 87 88 vx->runstate_times[vx->current_runstate] += delta_ns; 89 vx->current_runstate = state; 90 vx->runstate_entry_time = now; 91 } 92 93 void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) 94 { 95 struct kvm_vcpu_xen *vx = &v->arch.xen; 96 uint64_t state_entry_time; 97 unsigned int offset; 98 99 kvm_xen_update_runstate(v, state); 100 101 if (!vx->runstate_set) 102 return; 103 104 BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); 105 106 offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); 107 #ifdef CONFIG_X86_64 108 /* 109 * The only difference is alignment of uint64_t in 32-bit. 110 * So the first field 'state' is accessed directly using 111 * offsetof() (where its offset happens to be zero), while the 112 * remaining fields which are all uint64_t, start at 'offset' 113 * which we tweak here by adding 4. 114 */ 115 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 116 offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); 117 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != 118 offsetof(struct compat_vcpu_runstate_info, time) + 4); 119 120 if (v->kvm->arch.xen.long_mode) 121 offset = offsetof(struct vcpu_runstate_info, state_entry_time); 122 #endif 123 /* 124 * First write the updated state_entry_time at the appropriate 125 * location determined by 'offset'. 126 */ 127 state_entry_time = vx->runstate_entry_time; 128 state_entry_time |= XEN_RUNSTATE_UPDATE; 129 130 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != 131 sizeof(state_entry_time)); 132 BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != 133 sizeof(state_entry_time)); 134 135 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 136 &state_entry_time, offset, 137 sizeof(state_entry_time))) 138 return; 139 smp_wmb(); 140 141 /* 142 * Next, write the new runstate. This is in the *same* place 143 * for 32-bit and 64-bit guests, asserted here for paranoia. 144 */ 145 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 146 offsetof(struct compat_vcpu_runstate_info, state)); 147 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != 148 sizeof(vx->current_runstate)); 149 BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != 150 sizeof(vx->current_runstate)); 151 152 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 153 &vx->current_runstate, 154 offsetof(struct vcpu_runstate_info, state), 155 sizeof(vx->current_runstate))) 156 return; 157 158 /* 159 * Write the actual runstate times immediately after the 160 * runstate_entry_time. 161 */ 162 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 163 offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); 164 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != 165 offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); 166 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 167 sizeof(((struct compat_vcpu_runstate_info *)0)->time)); 168 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 169 sizeof(vx->runstate_times)); 170 171 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 172 &vx->runstate_times[0], 173 offset + sizeof(u64), 174 sizeof(vx->runstate_times))) 175 return; 176 177 smp_wmb(); 178 179 /* 180 * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's 181 * runstate_entry_time field. 182 */ 183 184 state_entry_time &= ~XEN_RUNSTATE_UPDATE; 185 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 186 &state_entry_time, offset, 187 sizeof(state_entry_time))) 188 return; 189 } 190 191 int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 192 { 193 int err; 194 u8 rc = 0; 195 196 /* 197 * If the global upcall vector (HVMIRQ_callback_vector) is set and 198 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. 199 */ 200 struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; 201 struct kvm_memslots *slots = kvm_memslots(v->kvm); 202 unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); 203 204 /* No need for compat handling here */ 205 BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != 206 offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); 207 BUILD_BUG_ON(sizeof(rc) != 208 sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending)); 209 BUILD_BUG_ON(sizeof(rc) != 210 sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending)); 211 212 /* 213 * For efficiency, this mirrors the checks for using the valid 214 * cache in kvm_read_guest_offset_cached(), but just uses 215 * __get_user() instead. And falls back to the slow path. 216 */ 217 if (likely(slots->generation == ghc->generation && 218 !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { 219 /* Fast path */ 220 pagefault_disable(); 221 err = __get_user(rc, (u8 __user *)ghc->hva + offset); 222 pagefault_enable(); 223 if (!err) 224 return rc; 225 } 226 227 /* Slow path */ 228 229 /* 230 * This function gets called from kvm_vcpu_block() after setting the 231 * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately 232 * from a HLT. So we really mustn't sleep. If the page ended up absent 233 * at that point, just return 1 in order to trigger an immediate wake, 234 * and we'll end up getting called again from a context where we *can* 235 * fault in the page and wait for it. 236 */ 237 if (in_atomic() || !task_is_running(current)) 238 return 1; 239 240 kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 241 sizeof(rc)); 242 243 return rc; 244 } 245 246 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 247 { 248 int r = -ENOENT; 249 250 mutex_lock(&kvm->lock); 251 252 switch (data->type) { 253 case KVM_XEN_ATTR_TYPE_LONG_MODE: 254 if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { 255 r = -EINVAL; 256 } else { 257 kvm->arch.xen.long_mode = !!data->u.long_mode; 258 r = 0; 259 } 260 break; 261 262 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 263 if (data->u.shared_info.gfn == GPA_INVALID) { 264 kvm->arch.xen.shinfo_gfn = GPA_INVALID; 265 r = 0; 266 break; 267 } 268 r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); 269 break; 270 271 272 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 273 if (data->u.vector && data->u.vector < 0x10) 274 r = -EINVAL; 275 else { 276 kvm->arch.xen.upcall_vector = data->u.vector; 277 r = 0; 278 } 279 break; 280 281 default: 282 break; 283 } 284 285 mutex_unlock(&kvm->lock); 286 return r; 287 } 288 289 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 290 { 291 int r = -ENOENT; 292 293 mutex_lock(&kvm->lock); 294 295 switch (data->type) { 296 case KVM_XEN_ATTR_TYPE_LONG_MODE: 297 data->u.long_mode = kvm->arch.xen.long_mode; 298 r = 0; 299 break; 300 301 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 302 data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn); 303 r = 0; 304 break; 305 306 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 307 data->u.vector = kvm->arch.xen.upcall_vector; 308 r = 0; 309 break; 310 311 default: 312 break; 313 } 314 315 mutex_unlock(&kvm->lock); 316 return r; 317 } 318 319 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 320 { 321 int idx, r = -ENOENT; 322 323 mutex_lock(&vcpu->kvm->lock); 324 idx = srcu_read_lock(&vcpu->kvm->srcu); 325 326 switch (data->type) { 327 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 328 /* No compat necessary here. */ 329 BUILD_BUG_ON(sizeof(struct vcpu_info) != 330 sizeof(struct compat_vcpu_info)); 331 BUILD_BUG_ON(offsetof(struct vcpu_info, time) != 332 offsetof(struct compat_vcpu_info, time)); 333 334 if (data->u.gpa == GPA_INVALID) { 335 vcpu->arch.xen.vcpu_info_set = false; 336 r = 0; 337 break; 338 } 339 340 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 341 &vcpu->arch.xen.vcpu_info_cache, 342 data->u.gpa, 343 sizeof(struct vcpu_info)); 344 if (!r) { 345 vcpu->arch.xen.vcpu_info_set = true; 346 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 347 } 348 break; 349 350 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 351 if (data->u.gpa == GPA_INVALID) { 352 vcpu->arch.xen.vcpu_time_info_set = false; 353 r = 0; 354 break; 355 } 356 357 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 358 &vcpu->arch.xen.vcpu_time_info_cache, 359 data->u.gpa, 360 sizeof(struct pvclock_vcpu_time_info)); 361 if (!r) { 362 vcpu->arch.xen.vcpu_time_info_set = true; 363 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 364 } 365 break; 366 367 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 368 if (!sched_info_on()) { 369 r = -EOPNOTSUPP; 370 break; 371 } 372 if (data->u.gpa == GPA_INVALID) { 373 vcpu->arch.xen.runstate_set = false; 374 r = 0; 375 break; 376 } 377 378 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 379 &vcpu->arch.xen.runstate_cache, 380 data->u.gpa, 381 sizeof(struct vcpu_runstate_info)); 382 if (!r) { 383 vcpu->arch.xen.runstate_set = true; 384 } 385 break; 386 387 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 388 if (!sched_info_on()) { 389 r = -EOPNOTSUPP; 390 break; 391 } 392 if (data->u.runstate.state > RUNSTATE_offline) { 393 r = -EINVAL; 394 break; 395 } 396 397 kvm_xen_update_runstate(vcpu, data->u.runstate.state); 398 r = 0; 399 break; 400 401 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 402 if (!sched_info_on()) { 403 r = -EOPNOTSUPP; 404 break; 405 } 406 if (data->u.runstate.state > RUNSTATE_offline) { 407 r = -EINVAL; 408 break; 409 } 410 if (data->u.runstate.state_entry_time != 411 (data->u.runstate.time_running + 412 data->u.runstate.time_runnable + 413 data->u.runstate.time_blocked + 414 data->u.runstate.time_offline)) { 415 r = -EINVAL; 416 break; 417 } 418 if (get_kvmclock_ns(vcpu->kvm) < 419 data->u.runstate.state_entry_time) { 420 r = -EINVAL; 421 break; 422 } 423 424 vcpu->arch.xen.current_runstate = data->u.runstate.state; 425 vcpu->arch.xen.runstate_entry_time = 426 data->u.runstate.state_entry_time; 427 vcpu->arch.xen.runstate_times[RUNSTATE_running] = 428 data->u.runstate.time_running; 429 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = 430 data->u.runstate.time_runnable; 431 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = 432 data->u.runstate.time_blocked; 433 vcpu->arch.xen.runstate_times[RUNSTATE_offline] = 434 data->u.runstate.time_offline; 435 vcpu->arch.xen.last_steal = current->sched_info.run_delay; 436 r = 0; 437 break; 438 439 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 440 if (!sched_info_on()) { 441 r = -EOPNOTSUPP; 442 break; 443 } 444 if (data->u.runstate.state > RUNSTATE_offline && 445 data->u.runstate.state != (u64)-1) { 446 r = -EINVAL; 447 break; 448 } 449 /* The adjustment must add up */ 450 if (data->u.runstate.state_entry_time != 451 (data->u.runstate.time_running + 452 data->u.runstate.time_runnable + 453 data->u.runstate.time_blocked + 454 data->u.runstate.time_offline)) { 455 r = -EINVAL; 456 break; 457 } 458 459 if (get_kvmclock_ns(vcpu->kvm) < 460 (vcpu->arch.xen.runstate_entry_time + 461 data->u.runstate.state_entry_time)) { 462 r = -EINVAL; 463 break; 464 } 465 466 vcpu->arch.xen.runstate_entry_time += 467 data->u.runstate.state_entry_time; 468 vcpu->arch.xen.runstate_times[RUNSTATE_running] += 469 data->u.runstate.time_running; 470 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += 471 data->u.runstate.time_runnable; 472 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += 473 data->u.runstate.time_blocked; 474 vcpu->arch.xen.runstate_times[RUNSTATE_offline] += 475 data->u.runstate.time_offline; 476 477 if (data->u.runstate.state <= RUNSTATE_offline) 478 kvm_xen_update_runstate(vcpu, data->u.runstate.state); 479 r = 0; 480 break; 481 482 default: 483 break; 484 } 485 486 srcu_read_unlock(&vcpu->kvm->srcu, idx); 487 mutex_unlock(&vcpu->kvm->lock); 488 return r; 489 } 490 491 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 492 { 493 int r = -ENOENT; 494 495 mutex_lock(&vcpu->kvm->lock); 496 497 switch (data->type) { 498 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 499 if (vcpu->arch.xen.vcpu_info_set) 500 data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; 501 else 502 data->u.gpa = GPA_INVALID; 503 r = 0; 504 break; 505 506 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 507 if (vcpu->arch.xen.vcpu_time_info_set) 508 data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; 509 else 510 data->u.gpa = GPA_INVALID; 511 r = 0; 512 break; 513 514 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 515 if (!sched_info_on()) { 516 r = -EOPNOTSUPP; 517 break; 518 } 519 if (vcpu->arch.xen.runstate_set) { 520 data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; 521 r = 0; 522 } 523 break; 524 525 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 526 if (!sched_info_on()) { 527 r = -EOPNOTSUPP; 528 break; 529 } 530 data->u.runstate.state = vcpu->arch.xen.current_runstate; 531 r = 0; 532 break; 533 534 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 535 if (!sched_info_on()) { 536 r = -EOPNOTSUPP; 537 break; 538 } 539 data->u.runstate.state = vcpu->arch.xen.current_runstate; 540 data->u.runstate.state_entry_time = 541 vcpu->arch.xen.runstate_entry_time; 542 data->u.runstate.time_running = 543 vcpu->arch.xen.runstate_times[RUNSTATE_running]; 544 data->u.runstate.time_runnable = 545 vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; 546 data->u.runstate.time_blocked = 547 vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; 548 data->u.runstate.time_offline = 549 vcpu->arch.xen.runstate_times[RUNSTATE_offline]; 550 r = 0; 551 break; 552 553 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 554 r = -EINVAL; 555 break; 556 557 default: 558 break; 559 } 560 561 mutex_unlock(&vcpu->kvm->lock); 562 return r; 563 } 564 565 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) 566 { 567 struct kvm *kvm = vcpu->kvm; 568 u32 page_num = data & ~PAGE_MASK; 569 u64 page_addr = data & PAGE_MASK; 570 bool lm = is_long_mode(vcpu); 571 572 /* Latch long_mode for shared_info pages etc. */ 573 vcpu->kvm->arch.xen.long_mode = lm; 574 575 /* 576 * If Xen hypercall intercept is enabled, fill the hypercall 577 * page with VMCALL/VMMCALL instructions since that's what 578 * we catch. Else the VMM has provided the hypercall pages 579 * with instructions of its own choosing, so use those. 580 */ 581 if (kvm_xen_hypercall_enabled(kvm)) { 582 u8 instructions[32]; 583 int i; 584 585 if (page_num) 586 return 1; 587 588 /* mov imm32, %eax */ 589 instructions[0] = 0xb8; 590 591 /* vmcall / vmmcall */ 592 kvm_x86_ops.patch_hypercall(vcpu, instructions + 5); 593 594 /* ret */ 595 instructions[8] = 0xc3; 596 597 /* int3 to pad */ 598 memset(instructions + 9, 0xcc, sizeof(instructions) - 9); 599 600 for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { 601 *(u32 *)&instructions[1] = i; 602 if (kvm_vcpu_write_guest(vcpu, 603 page_addr + (i * sizeof(instructions)), 604 instructions, sizeof(instructions))) 605 return 1; 606 } 607 } else { 608 /* 609 * Note, truncation is a non-issue as 'lm' is guaranteed to be 610 * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. 611 */ 612 hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 613 : kvm->arch.xen_hvm_config.blob_addr_32; 614 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 615 : kvm->arch.xen_hvm_config.blob_size_32; 616 u8 *page; 617 618 if (page_num >= blob_size) 619 return 1; 620 621 blob_addr += page_num * PAGE_SIZE; 622 623 page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); 624 if (IS_ERR(page)) 625 return PTR_ERR(page); 626 627 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) { 628 kfree(page); 629 return 1; 630 } 631 } 632 return 0; 633 } 634 635 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) 636 { 637 if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) 638 return -EINVAL; 639 640 /* 641 * With hypercall interception the kernel generates its own 642 * hypercall page so it must not be provided. 643 */ 644 if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && 645 (xhc->blob_addr_32 || xhc->blob_addr_64 || 646 xhc->blob_size_32 || xhc->blob_size_64)) 647 return -EINVAL; 648 649 mutex_lock(&kvm->lock); 650 651 if (xhc->msr && !kvm->arch.xen_hvm_config.msr) 652 static_branch_inc(&kvm_xen_enabled.key); 653 else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) 654 static_branch_slow_dec_deferred(&kvm_xen_enabled); 655 656 memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); 657 658 mutex_unlock(&kvm->lock); 659 return 0; 660 } 661 662 void kvm_xen_init_vm(struct kvm *kvm) 663 { 664 kvm->arch.xen.shinfo_gfn = GPA_INVALID; 665 } 666 667 void kvm_xen_destroy_vm(struct kvm *kvm) 668 { 669 if (kvm->arch.xen_hvm_config.msr) 670 static_branch_slow_dec_deferred(&kvm_xen_enabled); 671 } 672 673 static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 674 { 675 kvm_rax_write(vcpu, result); 676 return kvm_skip_emulated_instruction(vcpu); 677 } 678 679 static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 680 { 681 struct kvm_run *run = vcpu->run; 682 683 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) 684 return 1; 685 686 return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); 687 } 688 689 int kvm_xen_hypercall(struct kvm_vcpu *vcpu) 690 { 691 bool longmode; 692 u64 input, params[6]; 693 694 input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); 695 696 /* Hyper-V hypercalls get bit 31 set in EAX */ 697 if ((input & 0x80000000) && 698 kvm_hv_hypercall_enabled(vcpu)) 699 return kvm_hv_hypercall(vcpu); 700 701 longmode = is_64_bit_mode(vcpu); 702 if (!longmode) { 703 params[0] = (u32)kvm_rbx_read(vcpu); 704 params[1] = (u32)kvm_rcx_read(vcpu); 705 params[2] = (u32)kvm_rdx_read(vcpu); 706 params[3] = (u32)kvm_rsi_read(vcpu); 707 params[4] = (u32)kvm_rdi_read(vcpu); 708 params[5] = (u32)kvm_rbp_read(vcpu); 709 } 710 #ifdef CONFIG_X86_64 711 else { 712 params[0] = (u64)kvm_rdi_read(vcpu); 713 params[1] = (u64)kvm_rsi_read(vcpu); 714 params[2] = (u64)kvm_rdx_read(vcpu); 715 params[3] = (u64)kvm_r10_read(vcpu); 716 params[4] = (u64)kvm_r8_read(vcpu); 717 params[5] = (u64)kvm_r9_read(vcpu); 718 } 719 #endif 720 trace_kvm_xen_hypercall(input, params[0], params[1], params[2], 721 params[3], params[4], params[5]); 722 723 vcpu->run->exit_reason = KVM_EXIT_XEN; 724 vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; 725 vcpu->run->xen.u.hcall.longmode = longmode; 726 vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu); 727 vcpu->run->xen.u.hcall.input = input; 728 vcpu->run->xen.u.hcall.params[0] = params[0]; 729 vcpu->run->xen.u.hcall.params[1] = params[1]; 730 vcpu->run->xen.u.hcall.params[2] = params[2]; 731 vcpu->run->xen.u.hcall.params[3] = params[3]; 732 vcpu->run->xen.u.hcall.params[4] = params[4]; 733 vcpu->run->xen.u.hcall.params[5] = params[5]; 734 vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); 735 vcpu->arch.complete_userspace_io = 736 kvm_xen_hypercall_complete_userspace; 737 738 return 0; 739 } 740