1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21 #include "x86.h" 22 #include "lapic.h" 23 #include "ioapic.h" 24 #include "cpuid.h" 25 #include "hyperv.h" 26 #include "xen.h" 27 28 #include <linux/cpu.h> 29 #include <linux/kvm_host.h> 30 #include <linux/highmem.h> 31 #include <linux/sched/cputime.h> 32 #include <linux/eventfd.h> 33 34 #include <asm/apicdef.h> 35 #include <trace/events/kvm.h> 36 37 #include "trace.h" 38 #include "irq.h" 39 #include "fpu.h" 40 41 /* "Hv#1" signature */ 42 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 43 44 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 45 46 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 47 bool vcpu_kick); 48 49 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 50 { 51 return atomic64_read(&synic->sint[sint]); 52 } 53 54 static inline int synic_get_sint_vector(u64 sint_value) 55 { 56 if (sint_value & HV_SYNIC_SINT_MASKED) 57 return -1; 58 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 59 } 60 61 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 62 int vector) 63 { 64 int i; 65 66 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 67 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 68 return true; 69 } 70 return false; 71 } 72 73 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 74 int vector) 75 { 76 int i; 77 u64 sint_value; 78 79 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 80 sint_value = synic_read_sint(synic, i); 81 if (synic_get_sint_vector(sint_value) == vector && 82 sint_value & HV_SYNIC_SINT_AUTO_EOI) 83 return true; 84 } 85 return false; 86 } 87 88 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 89 int vector) 90 { 91 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 92 return; 93 94 if (synic_has_vector_connected(synic, vector)) 95 __set_bit(vector, synic->vec_bitmap); 96 else 97 __clear_bit(vector, synic->vec_bitmap); 98 99 if (synic_has_vector_auto_eoi(synic, vector)) 100 __set_bit(vector, synic->auto_eoi_bitmap); 101 else 102 __clear_bit(vector, synic->auto_eoi_bitmap); 103 } 104 105 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 106 u64 data, bool host) 107 { 108 int vector, old_vector; 109 bool masked; 110 111 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 112 masked = data & HV_SYNIC_SINT_MASKED; 113 114 /* 115 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 116 * default '0x10000' value on boot and this should not #GP. We need to 117 * allow zero-initing the register from host as well. 118 */ 119 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 120 return 1; 121 /* 122 * Guest may configure multiple SINTs to use the same vector, so 123 * we maintain a bitmap of vectors handled by synic, and a 124 * bitmap of vectors with auto-eoi behavior. The bitmaps are 125 * updated here, and atomically queried on fast paths. 126 */ 127 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 128 129 atomic64_set(&synic->sint[sint], data); 130 131 synic_update_vector(synic, old_vector); 132 133 synic_update_vector(synic, vector); 134 135 /* Load SynIC vectors into EOI exit bitmap */ 136 kvm_make_request(KVM_REQ_SCAN_IOAPIC, hv_synic_to_vcpu(synic)); 137 return 0; 138 } 139 140 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 141 { 142 struct kvm_vcpu *vcpu = NULL; 143 int i; 144 145 if (vpidx >= KVM_MAX_VCPUS) 146 return NULL; 147 148 vcpu = kvm_get_vcpu(kvm, vpidx); 149 if (vcpu && kvm_hv_get_vpindex(vcpu) == vpidx) 150 return vcpu; 151 kvm_for_each_vcpu(i, vcpu, kvm) 152 if (kvm_hv_get_vpindex(vcpu) == vpidx) 153 return vcpu; 154 return NULL; 155 } 156 157 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 158 { 159 struct kvm_vcpu *vcpu; 160 struct kvm_vcpu_hv_synic *synic; 161 162 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 163 if (!vcpu || !to_hv_vcpu(vcpu)) 164 return NULL; 165 synic = to_hv_synic(vcpu); 166 return (synic->active) ? synic : NULL; 167 } 168 169 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 170 { 171 struct kvm *kvm = vcpu->kvm; 172 struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); 173 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 174 struct kvm_vcpu_hv_stimer *stimer; 175 int gsi, idx; 176 177 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 178 179 /* Try to deliver pending Hyper-V SynIC timers messages */ 180 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 181 stimer = &hv_vcpu->stimer[idx]; 182 if (stimer->msg_pending && stimer->config.enable && 183 !stimer->config.direct_mode && 184 stimer->config.sintx == sint) 185 stimer_mark_pending(stimer, false); 186 } 187 188 idx = srcu_read_lock(&kvm->irq_srcu); 189 gsi = atomic_read(&synic->sint_to_gsi[sint]); 190 if (gsi != -1) 191 kvm_notify_acked_gsi(kvm, gsi); 192 srcu_read_unlock(&kvm->irq_srcu, idx); 193 } 194 195 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 196 { 197 struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); 198 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 199 200 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 201 hv_vcpu->exit.u.synic.msr = msr; 202 hv_vcpu->exit.u.synic.control = synic->control; 203 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 204 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 205 206 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 207 } 208 209 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 210 u32 msr, u64 data, bool host) 211 { 212 struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); 213 int ret; 214 215 if (!synic->active && !host) 216 return 1; 217 218 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 219 220 ret = 0; 221 switch (msr) { 222 case HV_X64_MSR_SCONTROL: 223 synic->control = data; 224 if (!host) 225 synic_exit(synic, msr); 226 break; 227 case HV_X64_MSR_SVERSION: 228 if (!host) { 229 ret = 1; 230 break; 231 } 232 synic->version = data; 233 break; 234 case HV_X64_MSR_SIEFP: 235 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 236 !synic->dont_zero_synic_pages) 237 if (kvm_clear_guest(vcpu->kvm, 238 data & PAGE_MASK, PAGE_SIZE)) { 239 ret = 1; 240 break; 241 } 242 synic->evt_page = data; 243 if (!host) 244 synic_exit(synic, msr); 245 break; 246 case HV_X64_MSR_SIMP: 247 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 248 !synic->dont_zero_synic_pages) 249 if (kvm_clear_guest(vcpu->kvm, 250 data & PAGE_MASK, PAGE_SIZE)) { 251 ret = 1; 252 break; 253 } 254 synic->msg_page = data; 255 if (!host) 256 synic_exit(synic, msr); 257 break; 258 case HV_X64_MSR_EOM: { 259 int i; 260 261 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 262 kvm_hv_notify_acked_sint(vcpu, i); 263 break; 264 } 265 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 266 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 267 break; 268 default: 269 ret = 1; 270 break; 271 } 272 return ret; 273 } 274 275 static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu) 276 { 277 struct kvm_cpuid_entry2 *entry; 278 279 entry = kvm_find_cpuid_entry(vcpu, 280 HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 281 0); 282 if (!entry) 283 return false; 284 285 return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 286 } 287 288 static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu) 289 { 290 struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); 291 292 if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL) 293 hv->hv_syndbg.control.status = 294 vcpu->run->hyperv.u.syndbg.status; 295 return 1; 296 } 297 298 static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr) 299 { 300 struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu); 301 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 302 303 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG; 304 hv_vcpu->exit.u.syndbg.msr = msr; 305 hv_vcpu->exit.u.syndbg.control = syndbg->control.control; 306 hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page; 307 hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page; 308 hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page; 309 vcpu->arch.complete_userspace_io = 310 kvm_hv_syndbg_complete_userspace; 311 312 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 313 } 314 315 static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 316 { 317 struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu); 318 319 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 320 return 1; 321 322 trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id, 323 to_hv_vcpu(vcpu)->vp_index, msr, data); 324 switch (msr) { 325 case HV_X64_MSR_SYNDBG_CONTROL: 326 syndbg->control.control = data; 327 if (!host) 328 syndbg_exit(vcpu, msr); 329 break; 330 case HV_X64_MSR_SYNDBG_STATUS: 331 syndbg->control.status = data; 332 break; 333 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 334 syndbg->control.send_page = data; 335 break; 336 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 337 syndbg->control.recv_page = data; 338 break; 339 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 340 syndbg->control.pending_page = data; 341 if (!host) 342 syndbg_exit(vcpu, msr); 343 break; 344 case HV_X64_MSR_SYNDBG_OPTIONS: 345 syndbg->options = data; 346 break; 347 default: 348 break; 349 } 350 351 return 0; 352 } 353 354 static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 355 { 356 struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu); 357 358 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 359 return 1; 360 361 switch (msr) { 362 case HV_X64_MSR_SYNDBG_CONTROL: 363 *pdata = syndbg->control.control; 364 break; 365 case HV_X64_MSR_SYNDBG_STATUS: 366 *pdata = syndbg->control.status; 367 break; 368 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 369 *pdata = syndbg->control.send_page; 370 break; 371 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 372 *pdata = syndbg->control.recv_page; 373 break; 374 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 375 *pdata = syndbg->control.pending_page; 376 break; 377 case HV_X64_MSR_SYNDBG_OPTIONS: 378 *pdata = syndbg->options; 379 break; 380 default: 381 break; 382 } 383 384 trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, kvm_hv_get_vpindex(vcpu), msr, *pdata); 385 386 return 0; 387 } 388 389 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 390 bool host) 391 { 392 int ret; 393 394 if (!synic->active && !host) 395 return 1; 396 397 ret = 0; 398 switch (msr) { 399 case HV_X64_MSR_SCONTROL: 400 *pdata = synic->control; 401 break; 402 case HV_X64_MSR_SVERSION: 403 *pdata = synic->version; 404 break; 405 case HV_X64_MSR_SIEFP: 406 *pdata = synic->evt_page; 407 break; 408 case HV_X64_MSR_SIMP: 409 *pdata = synic->msg_page; 410 break; 411 case HV_X64_MSR_EOM: 412 *pdata = 0; 413 break; 414 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 415 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 416 break; 417 default: 418 ret = 1; 419 break; 420 } 421 return ret; 422 } 423 424 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 425 { 426 struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); 427 struct kvm_lapic_irq irq; 428 int ret, vector; 429 430 if (sint >= ARRAY_SIZE(synic->sint)) 431 return -EINVAL; 432 433 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 434 if (vector < 0) 435 return -ENOENT; 436 437 memset(&irq, 0, sizeof(irq)); 438 irq.shorthand = APIC_DEST_SELF; 439 irq.dest_mode = APIC_DEST_PHYSICAL; 440 irq.delivery_mode = APIC_DM_FIXED; 441 irq.vector = vector; 442 irq.level = 1; 443 444 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 445 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 446 return ret; 447 } 448 449 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 450 { 451 struct kvm_vcpu_hv_synic *synic; 452 453 synic = synic_get(kvm, vpidx); 454 if (!synic) 455 return -EINVAL; 456 457 return synic_set_irq(synic, sint); 458 } 459 460 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 461 { 462 struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); 463 int i; 464 465 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 466 467 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 468 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 469 kvm_hv_notify_acked_sint(vcpu, i); 470 } 471 472 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 473 { 474 struct kvm_vcpu_hv_synic *synic; 475 476 synic = synic_get(kvm, vpidx); 477 if (!synic) 478 return -EINVAL; 479 480 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 481 return -EINVAL; 482 483 atomic_set(&synic->sint_to_gsi[sint], gsi); 484 return 0; 485 } 486 487 void kvm_hv_irq_routing_update(struct kvm *kvm) 488 { 489 struct kvm_irq_routing_table *irq_rt; 490 struct kvm_kernel_irq_routing_entry *e; 491 u32 gsi; 492 493 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 494 lockdep_is_held(&kvm->irq_lock)); 495 496 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 497 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 498 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 499 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 500 e->hv_sint.sint, gsi); 501 } 502 } 503 } 504 505 static void synic_init(struct kvm_vcpu_hv_synic *synic) 506 { 507 int i; 508 509 memset(synic, 0, sizeof(*synic)); 510 synic->version = HV_SYNIC_VERSION_1; 511 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 512 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 513 atomic_set(&synic->sint_to_gsi[i], -1); 514 } 515 } 516 517 static u64 get_time_ref_counter(struct kvm *kvm) 518 { 519 struct kvm_hv *hv = to_kvm_hv(kvm); 520 struct kvm_vcpu *vcpu; 521 u64 tsc; 522 523 /* 524 * Fall back to get_kvmclock_ns() when TSC page hasn't been set up, 525 * is broken, disabled or being updated. 526 */ 527 if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET) 528 return div_u64(get_kvmclock_ns(kvm), 100); 529 530 vcpu = kvm_get_vcpu(kvm, 0); 531 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 532 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 533 + hv->tsc_ref.tsc_offset; 534 } 535 536 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 537 bool vcpu_kick) 538 { 539 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 540 541 set_bit(stimer->index, 542 to_hv_vcpu(vcpu)->stimer_pending_bitmap); 543 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 544 if (vcpu_kick) 545 kvm_vcpu_kick(vcpu); 546 } 547 548 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 549 { 550 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 551 552 trace_kvm_hv_stimer_cleanup(hv_stimer_to_vcpu(stimer)->vcpu_id, 553 stimer->index); 554 555 hrtimer_cancel(&stimer->timer); 556 clear_bit(stimer->index, 557 to_hv_vcpu(vcpu)->stimer_pending_bitmap); 558 stimer->msg_pending = false; 559 stimer->exp_time = 0; 560 } 561 562 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 563 { 564 struct kvm_vcpu_hv_stimer *stimer; 565 566 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 567 trace_kvm_hv_stimer_callback(hv_stimer_to_vcpu(stimer)->vcpu_id, 568 stimer->index); 569 stimer_mark_pending(stimer, true); 570 571 return HRTIMER_NORESTART; 572 } 573 574 /* 575 * stimer_start() assumptions: 576 * a) stimer->count is not equal to 0 577 * b) stimer->config has HV_STIMER_ENABLE flag 578 */ 579 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 580 { 581 u64 time_now; 582 ktime_t ktime_now; 583 584 time_now = get_time_ref_counter(hv_stimer_to_vcpu(stimer)->kvm); 585 ktime_now = ktime_get(); 586 587 if (stimer->config.periodic) { 588 if (stimer->exp_time) { 589 if (time_now >= stimer->exp_time) { 590 u64 remainder; 591 592 div64_u64_rem(time_now - stimer->exp_time, 593 stimer->count, &remainder); 594 stimer->exp_time = 595 time_now + (stimer->count - remainder); 596 } 597 } else 598 stimer->exp_time = time_now + stimer->count; 599 600 trace_kvm_hv_stimer_start_periodic( 601 hv_stimer_to_vcpu(stimer)->vcpu_id, 602 stimer->index, 603 time_now, stimer->exp_time); 604 605 hrtimer_start(&stimer->timer, 606 ktime_add_ns(ktime_now, 607 100 * (stimer->exp_time - time_now)), 608 HRTIMER_MODE_ABS); 609 return 0; 610 } 611 stimer->exp_time = stimer->count; 612 if (time_now >= stimer->count) { 613 /* 614 * Expire timer according to Hypervisor Top-Level Functional 615 * specification v4(15.3.1): 616 * "If a one shot is enabled and the specified count is in 617 * the past, it will expire immediately." 618 */ 619 stimer_mark_pending(stimer, false); 620 return 0; 621 } 622 623 trace_kvm_hv_stimer_start_one_shot(hv_stimer_to_vcpu(stimer)->vcpu_id, 624 stimer->index, 625 time_now, stimer->count); 626 627 hrtimer_start(&stimer->timer, 628 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 629 HRTIMER_MODE_ABS); 630 return 0; 631 } 632 633 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 634 bool host) 635 { 636 union hv_stimer_config new_config = {.as_uint64 = config}, 637 old_config = {.as_uint64 = stimer->config.as_uint64}; 638 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 639 struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); 640 641 if (!synic->active && !host) 642 return 1; 643 644 trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id, 645 stimer->index, config, host); 646 647 stimer_cleanup(stimer); 648 if (old_config.enable && 649 !new_config.direct_mode && new_config.sintx == 0) 650 new_config.enable = 0; 651 stimer->config.as_uint64 = new_config.as_uint64; 652 653 if (stimer->config.enable) 654 stimer_mark_pending(stimer, false); 655 656 return 0; 657 } 658 659 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 660 bool host) 661 { 662 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 663 struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); 664 665 if (!synic->active && !host) 666 return 1; 667 668 trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id, 669 stimer->index, count, host); 670 671 stimer_cleanup(stimer); 672 stimer->count = count; 673 if (stimer->count == 0) 674 stimer->config.enable = 0; 675 else if (stimer->config.auto_enable) 676 stimer->config.enable = 1; 677 678 if (stimer->config.enable) 679 stimer_mark_pending(stimer, false); 680 681 return 0; 682 } 683 684 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 685 { 686 *pconfig = stimer->config.as_uint64; 687 return 0; 688 } 689 690 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 691 { 692 *pcount = stimer->count; 693 return 0; 694 } 695 696 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 697 struct hv_message *src_msg, bool no_retry) 698 { 699 struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); 700 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 701 gfn_t msg_page_gfn; 702 struct hv_message_header hv_hdr; 703 int r; 704 705 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 706 return -ENOENT; 707 708 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 709 710 /* 711 * Strictly following the spec-mandated ordering would assume setting 712 * .msg_pending before checking .message_type. However, this function 713 * is only called in vcpu context so the entire update is atomic from 714 * guest POV and thus the exact order here doesn't matter. 715 */ 716 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 717 msg_off + offsetof(struct hv_message, 718 header.message_type), 719 sizeof(hv_hdr.message_type)); 720 if (r < 0) 721 return r; 722 723 if (hv_hdr.message_type != HVMSG_NONE) { 724 if (no_retry) 725 return 0; 726 727 hv_hdr.message_flags.msg_pending = 1; 728 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 729 &hv_hdr.message_flags, 730 msg_off + 731 offsetof(struct hv_message, 732 header.message_flags), 733 sizeof(hv_hdr.message_flags)); 734 if (r < 0) 735 return r; 736 return -EAGAIN; 737 } 738 739 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 740 sizeof(src_msg->header) + 741 src_msg->header.payload_size); 742 if (r < 0) 743 return r; 744 745 r = synic_set_irq(synic, sint); 746 if (r < 0) 747 return r; 748 if (r == 0) 749 return -EFAULT; 750 return 0; 751 } 752 753 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 754 { 755 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 756 struct hv_message *msg = &stimer->msg; 757 struct hv_timer_message_payload *payload = 758 (struct hv_timer_message_payload *)&msg->u.payload; 759 760 /* 761 * To avoid piling up periodic ticks, don't retry message 762 * delivery for them (within "lazy" lost ticks policy). 763 */ 764 bool no_retry = stimer->config.periodic; 765 766 payload->expiration_time = stimer->exp_time; 767 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 768 return synic_deliver_msg(to_hv_synic(vcpu), 769 stimer->config.sintx, msg, 770 no_retry); 771 } 772 773 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 774 { 775 struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); 776 struct kvm_lapic_irq irq = { 777 .delivery_mode = APIC_DM_FIXED, 778 .vector = stimer->config.apic_vector 779 }; 780 781 if (lapic_in_kernel(vcpu)) 782 return !kvm_apic_set_irq(vcpu, &irq, NULL); 783 return 0; 784 } 785 786 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 787 { 788 int r, direct = stimer->config.direct_mode; 789 790 stimer->msg_pending = true; 791 if (!direct) 792 r = stimer_send_msg(stimer); 793 else 794 r = stimer_notify_direct(stimer); 795 trace_kvm_hv_stimer_expiration(hv_stimer_to_vcpu(stimer)->vcpu_id, 796 stimer->index, direct, r); 797 if (!r) { 798 stimer->msg_pending = false; 799 if (!(stimer->config.periodic)) 800 stimer->config.enable = 0; 801 } 802 } 803 804 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 805 { 806 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 807 struct kvm_vcpu_hv_stimer *stimer; 808 u64 time_now, exp_time; 809 int i; 810 811 if (!hv_vcpu) 812 return; 813 814 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 815 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 816 stimer = &hv_vcpu->stimer[i]; 817 if (stimer->config.enable) { 818 exp_time = stimer->exp_time; 819 820 if (exp_time) { 821 time_now = 822 get_time_ref_counter(vcpu->kvm); 823 if (time_now >= exp_time) 824 stimer_expiration(stimer); 825 } 826 827 if ((stimer->config.enable) && 828 stimer->count) { 829 if (!stimer->msg_pending) 830 stimer_start(stimer); 831 } else 832 stimer_cleanup(stimer); 833 } 834 } 835 } 836 837 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 838 { 839 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 840 int i; 841 842 if (!hv_vcpu) 843 return; 844 845 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 846 stimer_cleanup(&hv_vcpu->stimer[i]); 847 848 kfree(hv_vcpu); 849 vcpu->arch.hyperv = NULL; 850 } 851 852 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 853 { 854 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 855 856 if (!hv_vcpu) 857 return false; 858 859 if (!(hv_vcpu->hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 860 return false; 861 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 862 } 863 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 864 865 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 866 struct hv_vp_assist_page *assist_page) 867 { 868 if (!kvm_hv_assist_page_enabled(vcpu)) 869 return false; 870 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 871 assist_page, sizeof(*assist_page)); 872 } 873 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 874 875 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 876 { 877 struct hv_message *msg = &stimer->msg; 878 struct hv_timer_message_payload *payload = 879 (struct hv_timer_message_payload *)&msg->u.payload; 880 881 memset(&msg->header, 0, sizeof(msg->header)); 882 msg->header.message_type = HVMSG_TIMER_EXPIRED; 883 msg->header.payload_size = sizeof(*payload); 884 885 payload->timer_index = stimer->index; 886 payload->expiration_time = 0; 887 payload->delivery_time = 0; 888 } 889 890 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 891 { 892 memset(stimer, 0, sizeof(*stimer)); 893 stimer->index = timer_index; 894 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 895 stimer->timer.function = stimer_timer_callback; 896 stimer_prepare_msg(stimer); 897 } 898 899 static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 900 { 901 struct kvm_vcpu_hv *hv_vcpu; 902 int i; 903 904 hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT); 905 if (!hv_vcpu) 906 return -ENOMEM; 907 908 vcpu->arch.hyperv = hv_vcpu; 909 hv_vcpu->vcpu = vcpu; 910 911 synic_init(&hv_vcpu->synic); 912 913 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 914 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 915 stimer_init(&hv_vcpu->stimer[i], i); 916 917 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 918 919 return 0; 920 } 921 922 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 923 { 924 struct kvm_vcpu_hv_synic *synic; 925 int r; 926 927 if (!to_hv_vcpu(vcpu)) { 928 r = kvm_hv_vcpu_init(vcpu); 929 if (r) 930 return r; 931 } 932 933 synic = to_hv_synic(vcpu); 934 935 /* 936 * Hyper-V SynIC auto EOI SINT's are 937 * not compatible with APICV, so request 938 * to deactivate APICV permanently. 939 */ 940 kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); 941 synic->active = true; 942 synic->dont_zero_synic_pages = dont_zero_synic_pages; 943 synic->control = HV_SYNIC_CONTROL_ENABLE; 944 return 0; 945 } 946 947 static bool kvm_hv_msr_partition_wide(u32 msr) 948 { 949 bool r = false; 950 951 switch (msr) { 952 case HV_X64_MSR_GUEST_OS_ID: 953 case HV_X64_MSR_HYPERCALL: 954 case HV_X64_MSR_REFERENCE_TSC: 955 case HV_X64_MSR_TIME_REF_COUNT: 956 case HV_X64_MSR_CRASH_CTL: 957 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 958 case HV_X64_MSR_RESET: 959 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 960 case HV_X64_MSR_TSC_EMULATION_CONTROL: 961 case HV_X64_MSR_TSC_EMULATION_STATUS: 962 case HV_X64_MSR_SYNDBG_OPTIONS: 963 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 964 r = true; 965 break; 966 } 967 968 return r; 969 } 970 971 static int kvm_hv_msr_get_crash_data(struct kvm *kvm, u32 index, u64 *pdata) 972 { 973 struct kvm_hv *hv = to_kvm_hv(kvm); 974 size_t size = ARRAY_SIZE(hv->hv_crash_param); 975 976 if (WARN_ON_ONCE(index >= size)) 977 return -EINVAL; 978 979 *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; 980 return 0; 981 } 982 983 static int kvm_hv_msr_get_crash_ctl(struct kvm *kvm, u64 *pdata) 984 { 985 struct kvm_hv *hv = to_kvm_hv(kvm); 986 987 *pdata = hv->hv_crash_ctl; 988 return 0; 989 } 990 991 static int kvm_hv_msr_set_crash_ctl(struct kvm *kvm, u64 data) 992 { 993 struct kvm_hv *hv = to_kvm_hv(kvm); 994 995 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 996 997 return 0; 998 } 999 1000 static int kvm_hv_msr_set_crash_data(struct kvm *kvm, u32 index, u64 data) 1001 { 1002 struct kvm_hv *hv = to_kvm_hv(kvm); 1003 size_t size = ARRAY_SIZE(hv->hv_crash_param); 1004 1005 if (WARN_ON_ONCE(index >= size)) 1006 return -EINVAL; 1007 1008 hv->hv_crash_param[array_index_nospec(index, size)] = data; 1009 return 0; 1010 } 1011 1012 /* 1013 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 1014 * between them is possible: 1015 * 1016 * kvmclock formula: 1017 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 1018 * + system_time 1019 * 1020 * Hyper-V formula: 1021 * nsec/100 = ticks * scale / 2^64 + offset 1022 * 1023 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 1024 * By dividing the kvmclock formula by 100 and equating what's left we get: 1025 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1026 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 1027 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 1028 * 1029 * Now expand the kvmclock formula and divide by 100: 1030 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 1031 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 1032 * + system_time 1033 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1034 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1035 * + system_time / 100 1036 * 1037 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 1038 * nsec/100 = ticks * scale / 2^64 1039 * - tsc_timestamp * scale / 2^64 1040 * + system_time / 100 1041 * 1042 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 1043 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 1044 * 1045 * These two equivalencies are implemented in this function. 1046 */ 1047 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 1048 struct ms_hyperv_tsc_page *tsc_ref) 1049 { 1050 u64 max_mul; 1051 1052 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 1053 return false; 1054 1055 /* 1056 * check if scale would overflow, if so we use the time ref counter 1057 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 1058 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 1059 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 1060 */ 1061 max_mul = 100ull << (32 - hv_clock->tsc_shift); 1062 if (hv_clock->tsc_to_system_mul >= max_mul) 1063 return false; 1064 1065 /* 1066 * Otherwise compute the scale and offset according to the formulas 1067 * derived above. 1068 */ 1069 tsc_ref->tsc_scale = 1070 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 1071 hv_clock->tsc_to_system_mul, 1072 100); 1073 1074 tsc_ref->tsc_offset = hv_clock->system_time; 1075 do_div(tsc_ref->tsc_offset, 100); 1076 tsc_ref->tsc_offset -= 1077 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 1078 return true; 1079 } 1080 1081 /* 1082 * Don't touch TSC page values if the guest has opted for TSC emulation after 1083 * migration. KVM doesn't fully support reenlightenment notifications and TSC 1084 * access emulation and Hyper-V is known to expect the values in TSC page to 1085 * stay constant before TSC access emulation is disabled from guest side 1086 * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC 1087 * frequency and guest visible TSC value across migration (and prevent it when 1088 * TSC scaling is unsupported). 1089 */ 1090 static inline bool tsc_page_update_unsafe(struct kvm_hv *hv) 1091 { 1092 return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) && 1093 hv->hv_tsc_emulation_control; 1094 } 1095 1096 void kvm_hv_setup_tsc_page(struct kvm *kvm, 1097 struct pvclock_vcpu_time_info *hv_clock) 1098 { 1099 struct kvm_hv *hv = to_kvm_hv(kvm); 1100 u32 tsc_seq; 1101 u64 gfn; 1102 1103 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1104 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1105 1106 if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1107 hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) 1108 return; 1109 1110 mutex_lock(&hv->hv_lock); 1111 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1112 goto out_unlock; 1113 1114 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1115 /* 1116 * Because the TSC parameters only vary when there is a 1117 * change in the master clock, do not bother with caching. 1118 */ 1119 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 1120 &tsc_seq, sizeof(tsc_seq)))) 1121 goto out_err; 1122 1123 if (tsc_seq && tsc_page_update_unsafe(hv)) { 1124 if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1125 goto out_err; 1126 1127 hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1128 goto out_unlock; 1129 } 1130 1131 /* 1132 * While we're computing and writing the parameters, force the 1133 * guest to use the time reference count MSR. 1134 */ 1135 hv->tsc_ref.tsc_sequence = 0; 1136 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1137 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1138 goto out_err; 1139 1140 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 1141 goto out_err; 1142 1143 /* Ensure sequence is zero before writing the rest of the struct. */ 1144 smp_wmb(); 1145 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1146 goto out_err; 1147 1148 /* 1149 * Now switch to the TSC page mechanism by writing the sequence. 1150 */ 1151 tsc_seq++; 1152 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 1153 tsc_seq = 1; 1154 1155 /* Write the struct entirely before the non-zero sequence. */ 1156 smp_wmb(); 1157 1158 hv->tsc_ref.tsc_sequence = tsc_seq; 1159 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1160 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1161 goto out_err; 1162 1163 hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1164 goto out_unlock; 1165 1166 out_err: 1167 hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1168 out_unlock: 1169 mutex_unlock(&hv->hv_lock); 1170 } 1171 1172 void kvm_hv_invalidate_tsc_page(struct kvm *kvm) 1173 { 1174 struct kvm_hv *hv = to_kvm_hv(kvm); 1175 u64 gfn; 1176 int idx; 1177 1178 if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1179 hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || 1180 tsc_page_update_unsafe(hv)) 1181 return; 1182 1183 mutex_lock(&hv->hv_lock); 1184 1185 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1186 goto out_unlock; 1187 1188 /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ 1189 if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) 1190 hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; 1191 1192 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1193 1194 hv->tsc_ref.tsc_sequence = 0; 1195 1196 /* 1197 * Take the srcu lock as memslots will be accessed to check the gfn 1198 * cache generation against the memslots generation. 1199 */ 1200 idx = srcu_read_lock(&kvm->srcu); 1201 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1202 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1203 hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1204 srcu_read_unlock(&kvm->srcu, idx); 1205 1206 out_unlock: 1207 mutex_unlock(&hv->hv_lock); 1208 } 1209 1210 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 1211 bool host) 1212 { 1213 struct kvm *kvm = vcpu->kvm; 1214 struct kvm_hv *hv = to_kvm_hv(kvm); 1215 1216 switch (msr) { 1217 case HV_X64_MSR_GUEST_OS_ID: 1218 hv->hv_guest_os_id = data; 1219 /* setting guest os id to zero disables hypercall page */ 1220 if (!hv->hv_guest_os_id) 1221 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1222 break; 1223 case HV_X64_MSR_HYPERCALL: { 1224 u8 instructions[9]; 1225 int i = 0; 1226 u64 addr; 1227 1228 /* if guest os id is not set hypercall should remain disabled */ 1229 if (!hv->hv_guest_os_id) 1230 break; 1231 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1232 hv->hv_hypercall = data; 1233 break; 1234 } 1235 1236 /* 1237 * If Xen and Hyper-V hypercalls are both enabled, disambiguate 1238 * the same way Xen itself does, by setting the bit 31 of EAX 1239 * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just 1240 * going to be clobbered on 64-bit. 1241 */ 1242 if (kvm_xen_hypercall_enabled(kvm)) { 1243 /* orl $0x80000000, %eax */ 1244 instructions[i++] = 0x0d; 1245 instructions[i++] = 0x00; 1246 instructions[i++] = 0x00; 1247 instructions[i++] = 0x00; 1248 instructions[i++] = 0x80; 1249 } 1250 1251 /* vmcall/vmmcall */ 1252 static_call(kvm_x86_patch_hypercall)(vcpu, instructions + i); 1253 i += 3; 1254 1255 /* ret */ 1256 ((unsigned char *)instructions)[i++] = 0xc3; 1257 1258 addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK; 1259 if (kvm_vcpu_write_guest(vcpu, addr, instructions, i)) 1260 return 1; 1261 hv->hv_hypercall = data; 1262 break; 1263 } 1264 case HV_X64_MSR_REFERENCE_TSC: 1265 hv->hv_tsc_page = data; 1266 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { 1267 if (!host) 1268 hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED; 1269 else 1270 hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; 1271 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1272 } else { 1273 hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET; 1274 } 1275 break; 1276 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1277 return kvm_hv_msr_set_crash_data(kvm, 1278 msr - HV_X64_MSR_CRASH_P0, 1279 data); 1280 case HV_X64_MSR_CRASH_CTL: 1281 if (host) 1282 return kvm_hv_msr_set_crash_ctl(kvm, data); 1283 1284 if (data & HV_CRASH_CTL_CRASH_NOTIFY) { 1285 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 1286 hv->hv_crash_param[0], 1287 hv->hv_crash_param[1], 1288 hv->hv_crash_param[2], 1289 hv->hv_crash_param[3], 1290 hv->hv_crash_param[4]); 1291 1292 /* Send notification about crash to user space */ 1293 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 1294 } 1295 break; 1296 case HV_X64_MSR_RESET: 1297 if (data == 1) { 1298 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1299 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1300 } 1301 break; 1302 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1303 hv->hv_reenlightenment_control = data; 1304 break; 1305 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1306 hv->hv_tsc_emulation_control = data; 1307 break; 1308 case HV_X64_MSR_TSC_EMULATION_STATUS: 1309 if (data && !host) 1310 return 1; 1311 1312 hv->hv_tsc_emulation_status = data; 1313 break; 1314 case HV_X64_MSR_TIME_REF_COUNT: 1315 /* read-only, but still ignore it if host-initiated */ 1316 if (!host) 1317 return 1; 1318 break; 1319 case HV_X64_MSR_SYNDBG_OPTIONS: 1320 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1321 return syndbg_set_msr(vcpu, msr, data, host); 1322 default: 1323 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1324 msr, data); 1325 return 1; 1326 } 1327 return 0; 1328 } 1329 1330 /* Calculate cpu time spent by current task in 100ns units */ 1331 static u64 current_task_runtime_100ns(void) 1332 { 1333 u64 utime, stime; 1334 1335 task_cputime_adjusted(current, &utime, &stime); 1336 1337 return div_u64(utime + stime, 100); 1338 } 1339 1340 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1341 { 1342 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 1343 1344 switch (msr) { 1345 case HV_X64_MSR_VP_INDEX: { 1346 struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); 1347 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1348 u32 new_vp_index = (u32)data; 1349 1350 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1351 return 1; 1352 1353 if (new_vp_index == hv_vcpu->vp_index) 1354 return 0; 1355 1356 /* 1357 * The VP index is initialized to vcpu_index by 1358 * kvm_hv_vcpu_postcreate so they initially match. Now the 1359 * VP index is changing, adjust num_mismatched_vp_indexes if 1360 * it now matches or no longer matches vcpu_idx. 1361 */ 1362 if (hv_vcpu->vp_index == vcpu_idx) 1363 atomic_inc(&hv->num_mismatched_vp_indexes); 1364 else if (new_vp_index == vcpu_idx) 1365 atomic_dec(&hv->num_mismatched_vp_indexes); 1366 1367 hv_vcpu->vp_index = new_vp_index; 1368 break; 1369 } 1370 case HV_X64_MSR_VP_ASSIST_PAGE: { 1371 u64 gfn; 1372 unsigned long addr; 1373 1374 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1375 hv_vcpu->hv_vapic = data; 1376 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1377 return 1; 1378 break; 1379 } 1380 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1381 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1382 if (kvm_is_error_hva(addr)) 1383 return 1; 1384 1385 /* 1386 * Clear apic_assist portion of struct hv_vp_assist_page 1387 * only, there can be valuable data in the rest which needs 1388 * to be preserved e.g. on migration. 1389 */ 1390 if (__put_user(0, (u32 __user *)addr)) 1391 return 1; 1392 hv_vcpu->hv_vapic = data; 1393 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1394 if (kvm_lapic_enable_pv_eoi(vcpu, 1395 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1396 sizeof(struct hv_vp_assist_page))) 1397 return 1; 1398 break; 1399 } 1400 case HV_X64_MSR_EOI: 1401 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1402 case HV_X64_MSR_ICR: 1403 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1404 case HV_X64_MSR_TPR: 1405 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1406 case HV_X64_MSR_VP_RUNTIME: 1407 if (!host) 1408 return 1; 1409 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1410 break; 1411 case HV_X64_MSR_SCONTROL: 1412 case HV_X64_MSR_SVERSION: 1413 case HV_X64_MSR_SIEFP: 1414 case HV_X64_MSR_SIMP: 1415 case HV_X64_MSR_EOM: 1416 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1417 return synic_set_msr(to_hv_synic(vcpu), msr, data, host); 1418 case HV_X64_MSR_STIMER0_CONFIG: 1419 case HV_X64_MSR_STIMER1_CONFIG: 1420 case HV_X64_MSR_STIMER2_CONFIG: 1421 case HV_X64_MSR_STIMER3_CONFIG: { 1422 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1423 1424 return stimer_set_config(to_hv_stimer(vcpu, timer_index), 1425 data, host); 1426 } 1427 case HV_X64_MSR_STIMER0_COUNT: 1428 case HV_X64_MSR_STIMER1_COUNT: 1429 case HV_X64_MSR_STIMER2_COUNT: 1430 case HV_X64_MSR_STIMER3_COUNT: { 1431 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1432 1433 return stimer_set_count(to_hv_stimer(vcpu, timer_index), 1434 data, host); 1435 } 1436 case HV_X64_MSR_TSC_FREQUENCY: 1437 case HV_X64_MSR_APIC_FREQUENCY: 1438 /* read-only, but still ignore it if host-initiated */ 1439 if (!host) 1440 return 1; 1441 break; 1442 default: 1443 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1444 msr, data); 1445 return 1; 1446 } 1447 1448 return 0; 1449 } 1450 1451 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1452 bool host) 1453 { 1454 u64 data = 0; 1455 struct kvm *kvm = vcpu->kvm; 1456 struct kvm_hv *hv = to_kvm_hv(kvm); 1457 1458 switch (msr) { 1459 case HV_X64_MSR_GUEST_OS_ID: 1460 data = hv->hv_guest_os_id; 1461 break; 1462 case HV_X64_MSR_HYPERCALL: 1463 data = hv->hv_hypercall; 1464 break; 1465 case HV_X64_MSR_TIME_REF_COUNT: 1466 data = get_time_ref_counter(kvm); 1467 break; 1468 case HV_X64_MSR_REFERENCE_TSC: 1469 data = hv->hv_tsc_page; 1470 break; 1471 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1472 return kvm_hv_msr_get_crash_data(kvm, 1473 msr - HV_X64_MSR_CRASH_P0, 1474 pdata); 1475 case HV_X64_MSR_CRASH_CTL: 1476 return kvm_hv_msr_get_crash_ctl(kvm, pdata); 1477 case HV_X64_MSR_RESET: 1478 data = 0; 1479 break; 1480 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1481 data = hv->hv_reenlightenment_control; 1482 break; 1483 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1484 data = hv->hv_tsc_emulation_control; 1485 break; 1486 case HV_X64_MSR_TSC_EMULATION_STATUS: 1487 data = hv->hv_tsc_emulation_status; 1488 break; 1489 case HV_X64_MSR_SYNDBG_OPTIONS: 1490 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1491 return syndbg_get_msr(vcpu, msr, pdata, host); 1492 default: 1493 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1494 return 1; 1495 } 1496 1497 *pdata = data; 1498 return 0; 1499 } 1500 1501 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1502 bool host) 1503 { 1504 u64 data = 0; 1505 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 1506 1507 switch (msr) { 1508 case HV_X64_MSR_VP_INDEX: 1509 data = hv_vcpu->vp_index; 1510 break; 1511 case HV_X64_MSR_EOI: 1512 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1513 case HV_X64_MSR_ICR: 1514 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1515 case HV_X64_MSR_TPR: 1516 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1517 case HV_X64_MSR_VP_ASSIST_PAGE: 1518 data = hv_vcpu->hv_vapic; 1519 break; 1520 case HV_X64_MSR_VP_RUNTIME: 1521 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1522 break; 1523 case HV_X64_MSR_SCONTROL: 1524 case HV_X64_MSR_SVERSION: 1525 case HV_X64_MSR_SIEFP: 1526 case HV_X64_MSR_SIMP: 1527 case HV_X64_MSR_EOM: 1528 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1529 return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host); 1530 case HV_X64_MSR_STIMER0_CONFIG: 1531 case HV_X64_MSR_STIMER1_CONFIG: 1532 case HV_X64_MSR_STIMER2_CONFIG: 1533 case HV_X64_MSR_STIMER3_CONFIG: { 1534 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1535 1536 return stimer_get_config(to_hv_stimer(vcpu, timer_index), 1537 pdata); 1538 } 1539 case HV_X64_MSR_STIMER0_COUNT: 1540 case HV_X64_MSR_STIMER1_COUNT: 1541 case HV_X64_MSR_STIMER2_COUNT: 1542 case HV_X64_MSR_STIMER3_COUNT: { 1543 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1544 1545 return stimer_get_count(to_hv_stimer(vcpu, timer_index), 1546 pdata); 1547 } 1548 case HV_X64_MSR_TSC_FREQUENCY: 1549 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1550 break; 1551 case HV_X64_MSR_APIC_FREQUENCY: 1552 data = APIC_BUS_FREQUENCY; 1553 break; 1554 default: 1555 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1556 return 1; 1557 } 1558 *pdata = data; 1559 return 0; 1560 } 1561 1562 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1563 { 1564 struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); 1565 1566 if (!host && !vcpu->arch.hyperv_enabled) 1567 return 1; 1568 1569 if (!to_hv_vcpu(vcpu)) { 1570 if (kvm_hv_vcpu_init(vcpu)) 1571 return 1; 1572 } 1573 1574 if (kvm_hv_msr_partition_wide(msr)) { 1575 int r; 1576 1577 mutex_lock(&hv->hv_lock); 1578 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1579 mutex_unlock(&hv->hv_lock); 1580 return r; 1581 } else 1582 return kvm_hv_set_msr(vcpu, msr, data, host); 1583 } 1584 1585 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1586 { 1587 struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); 1588 1589 if (!host && !vcpu->arch.hyperv_enabled) 1590 return 1; 1591 1592 if (!to_hv_vcpu(vcpu)) { 1593 if (kvm_hv_vcpu_init(vcpu)) 1594 return 1; 1595 } 1596 1597 if (kvm_hv_msr_partition_wide(msr)) { 1598 int r; 1599 1600 mutex_lock(&hv->hv_lock); 1601 r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host); 1602 mutex_unlock(&hv->hv_lock); 1603 return r; 1604 } else 1605 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1606 } 1607 1608 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1609 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1610 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1611 { 1612 struct kvm_hv *hv = to_kvm_hv(kvm); 1613 struct kvm_vcpu *vcpu; 1614 int i, bank, sbank = 0; 1615 1616 memset(vp_bitmap, 0, 1617 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1618 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1619 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1620 vp_bitmap[bank] = sparse_banks[sbank++]; 1621 1622 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1623 /* for all vcpus vp_index == vcpu_idx */ 1624 return (unsigned long *)vp_bitmap; 1625 } 1626 1627 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1628 kvm_for_each_vcpu(i, vcpu, kvm) { 1629 if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap)) 1630 __set_bit(i, vcpu_bitmap); 1631 } 1632 return vcpu_bitmap; 1633 } 1634 1635 struct kvm_hv_hcall { 1636 u64 param; 1637 u64 ingpa; 1638 u64 outgpa; 1639 u16 code; 1640 u16 rep_cnt; 1641 u16 rep_idx; 1642 bool fast; 1643 bool rep; 1644 sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; 1645 }; 1646 1647 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) 1648 { 1649 int i; 1650 gpa_t gpa; 1651 struct kvm *kvm = vcpu->kvm; 1652 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 1653 struct hv_tlb_flush_ex flush_ex; 1654 struct hv_tlb_flush flush; 1655 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1656 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1657 unsigned long *vcpu_mask; 1658 u64 valid_bank_mask; 1659 u64 sparse_banks[64]; 1660 int sparse_banks_len; 1661 bool all_cpus; 1662 1663 if (!ex) { 1664 if (hc->fast) { 1665 flush.address_space = hc->ingpa; 1666 flush.flags = hc->outgpa; 1667 flush.processor_mask = sse128_lo(hc->xmm[0]); 1668 } else { 1669 if (unlikely(kvm_read_guest(kvm, hc->ingpa, 1670 &flush, sizeof(flush)))) 1671 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1672 } 1673 1674 trace_kvm_hv_flush_tlb(flush.processor_mask, 1675 flush.address_space, flush.flags); 1676 1677 valid_bank_mask = BIT_ULL(0); 1678 sparse_banks[0] = flush.processor_mask; 1679 1680 /* 1681 * Work around possible WS2012 bug: it sends hypercalls 1682 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1683 * while also expecting us to flush something and crashing if 1684 * we don't. Let's treat processor_mask == 0 same as 1685 * HV_FLUSH_ALL_PROCESSORS. 1686 */ 1687 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1688 flush.processor_mask == 0; 1689 } else { 1690 if (hc->fast) { 1691 flush_ex.address_space = hc->ingpa; 1692 flush_ex.flags = hc->outgpa; 1693 memcpy(&flush_ex.hv_vp_set, 1694 &hc->xmm[0], sizeof(hc->xmm[0])); 1695 } else { 1696 if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, 1697 sizeof(flush_ex)))) 1698 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1699 } 1700 1701 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1702 flush_ex.hv_vp_set.format, 1703 flush_ex.address_space, 1704 flush_ex.flags); 1705 1706 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1707 all_cpus = flush_ex.hv_vp_set.format != 1708 HV_GENERIC_SET_SPARSE_4K; 1709 1710 sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64); 1711 1712 if (!sparse_banks_len && !all_cpus) 1713 goto ret_success; 1714 1715 if (!all_cpus) { 1716 if (hc->fast) { 1717 if (sparse_banks_len > HV_HYPERCALL_MAX_XMM_REGISTERS - 1) 1718 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1719 for (i = 0; i < sparse_banks_len; i += 2) { 1720 sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]); 1721 sparse_banks[i + 1] = sse128_hi(hc->xmm[i / 2 + 1]); 1722 } 1723 } else { 1724 gpa = hc->ingpa + offsetof(struct hv_tlb_flush_ex, 1725 hv_vp_set.bank_contents); 1726 if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks, 1727 sparse_banks_len * 1728 sizeof(sparse_banks[0])))) 1729 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1730 } 1731 } 1732 } 1733 1734 cpumask_clear(&hv_vcpu->tlb_flush); 1735 1736 vcpu_mask = all_cpus ? NULL : 1737 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1738 vp_bitmap, vcpu_bitmap); 1739 1740 /* 1741 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1742 * analyze it here, flush TLB regardless of the specified address space. 1743 */ 1744 kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, 1745 NULL, vcpu_mask, &hv_vcpu->tlb_flush); 1746 1747 ret_success: 1748 /* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */ 1749 return (u64)HV_STATUS_SUCCESS | 1750 ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1751 } 1752 1753 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1754 unsigned long *vcpu_bitmap) 1755 { 1756 struct kvm_lapic_irq irq = { 1757 .delivery_mode = APIC_DM_FIXED, 1758 .vector = vector 1759 }; 1760 struct kvm_vcpu *vcpu; 1761 int i; 1762 1763 kvm_for_each_vcpu(i, vcpu, kvm) { 1764 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1765 continue; 1766 1767 /* We fail only when APIC is disabled */ 1768 kvm_apic_set_irq(vcpu, &irq, NULL); 1769 } 1770 } 1771 1772 static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) 1773 { 1774 struct kvm *kvm = vcpu->kvm; 1775 struct hv_send_ipi_ex send_ipi_ex; 1776 struct hv_send_ipi send_ipi; 1777 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1778 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1779 unsigned long *vcpu_mask; 1780 unsigned long valid_bank_mask; 1781 u64 sparse_banks[64]; 1782 int sparse_banks_len; 1783 u32 vector; 1784 bool all_cpus; 1785 1786 if (!ex) { 1787 if (!hc->fast) { 1788 if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, 1789 sizeof(send_ipi)))) 1790 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1791 sparse_banks[0] = send_ipi.cpu_mask; 1792 vector = send_ipi.vector; 1793 } else { 1794 /* 'reserved' part of hv_send_ipi should be 0 */ 1795 if (unlikely(hc->ingpa >> 32 != 0)) 1796 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1797 sparse_banks[0] = hc->outgpa; 1798 vector = (u32)hc->ingpa; 1799 } 1800 all_cpus = false; 1801 valid_bank_mask = BIT_ULL(0); 1802 1803 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1804 } else { 1805 if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, 1806 sizeof(send_ipi_ex)))) 1807 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1808 1809 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1810 send_ipi_ex.vp_set.format, 1811 send_ipi_ex.vp_set.valid_bank_mask); 1812 1813 vector = send_ipi_ex.vector; 1814 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1815 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1816 sizeof(sparse_banks[0]); 1817 1818 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1819 1820 if (!sparse_banks_len) 1821 goto ret_success; 1822 1823 if (!all_cpus && 1824 kvm_read_guest(kvm, 1825 hc->ingpa + offsetof(struct hv_send_ipi_ex, 1826 vp_set.bank_contents), 1827 sparse_banks, 1828 sparse_banks_len)) 1829 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1830 } 1831 1832 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1833 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1834 1835 vcpu_mask = all_cpus ? NULL : 1836 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1837 vp_bitmap, vcpu_bitmap); 1838 1839 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1840 1841 ret_success: 1842 return HV_STATUS_SUCCESS; 1843 } 1844 1845 void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) 1846 { 1847 struct kvm_cpuid_entry2 *entry; 1848 1849 entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0); 1850 if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) 1851 vcpu->arch.hyperv_enabled = true; 1852 else 1853 vcpu->arch.hyperv_enabled = false; 1854 } 1855 1856 bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu) 1857 { 1858 return vcpu->arch.hyperv_enabled && to_kvm_hv(vcpu->kvm)->hv_guest_os_id; 1859 } 1860 1861 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1862 { 1863 bool longmode; 1864 1865 longmode = is_64_bit_mode(vcpu); 1866 if (longmode) 1867 kvm_rax_write(vcpu, result); 1868 else { 1869 kvm_rdx_write(vcpu, result >> 32); 1870 kvm_rax_write(vcpu, result & 0xffffffff); 1871 } 1872 } 1873 1874 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1875 { 1876 kvm_hv_hypercall_set_result(vcpu, result); 1877 ++vcpu->stat.hypercalls; 1878 return kvm_skip_emulated_instruction(vcpu); 1879 } 1880 1881 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1882 { 1883 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1884 } 1885 1886 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) 1887 { 1888 struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); 1889 struct eventfd_ctx *eventfd; 1890 1891 if (unlikely(!hc->fast)) { 1892 int ret; 1893 gpa_t gpa = hc->ingpa; 1894 1895 if ((gpa & (__alignof__(hc->ingpa) - 1)) || 1896 offset_in_page(gpa) + sizeof(hc->ingpa) > PAGE_SIZE) 1897 return HV_STATUS_INVALID_ALIGNMENT; 1898 1899 ret = kvm_vcpu_read_guest(vcpu, gpa, 1900 &hc->ingpa, sizeof(hc->ingpa)); 1901 if (ret < 0) 1902 return HV_STATUS_INVALID_ALIGNMENT; 1903 } 1904 1905 /* 1906 * Per spec, bits 32-47 contain the extra "flag number". However, we 1907 * have no use for it, and in all known usecases it is zero, so just 1908 * report lookup failure if it isn't. 1909 */ 1910 if (hc->ingpa & 0xffff00000000ULL) 1911 return HV_STATUS_INVALID_PORT_ID; 1912 /* remaining bits are reserved-zero */ 1913 if (hc->ingpa & ~KVM_HYPERV_CONN_ID_MASK) 1914 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1915 1916 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1917 rcu_read_lock(); 1918 eventfd = idr_find(&hv->conn_to_evt, hc->ingpa); 1919 rcu_read_unlock(); 1920 if (!eventfd) 1921 return HV_STATUS_INVALID_PORT_ID; 1922 1923 eventfd_signal(eventfd, 1); 1924 return HV_STATUS_SUCCESS; 1925 } 1926 1927 static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) 1928 { 1929 switch (hc->code) { 1930 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1931 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1932 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1933 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1934 return true; 1935 } 1936 1937 return false; 1938 } 1939 1940 static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) 1941 { 1942 int reg; 1943 1944 kvm_fpu_get(); 1945 for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) 1946 _kvm_read_sse_reg(reg, &hc->xmm[reg]); 1947 kvm_fpu_put(); 1948 } 1949 1950 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1951 { 1952 struct kvm_hv_hcall hc; 1953 u64 ret = HV_STATUS_SUCCESS; 1954 1955 /* 1956 * hypercall generates UD from non zero cpl and real mode 1957 * per HYPER-V spec 1958 */ 1959 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || !is_protmode(vcpu)) { 1960 kvm_queue_exception(vcpu, UD_VECTOR); 1961 return 1; 1962 } 1963 1964 #ifdef CONFIG_X86_64 1965 if (is_64_bit_mode(vcpu)) { 1966 hc.param = kvm_rcx_read(vcpu); 1967 hc.ingpa = kvm_rdx_read(vcpu); 1968 hc.outgpa = kvm_r8_read(vcpu); 1969 } else 1970 #endif 1971 { 1972 hc.param = ((u64)kvm_rdx_read(vcpu) << 32) | 1973 (kvm_rax_read(vcpu) & 0xffffffff); 1974 hc.ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1975 (kvm_rcx_read(vcpu) & 0xffffffff); 1976 hc.outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1977 (kvm_rsi_read(vcpu) & 0xffffffff); 1978 } 1979 1980 hc.code = hc.param & 0xffff; 1981 hc.fast = !!(hc.param & HV_HYPERCALL_FAST_BIT); 1982 hc.rep_cnt = (hc.param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1983 hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1984 hc.rep = !!(hc.rep_cnt || hc.rep_idx); 1985 1986 if (hc.fast && is_xmm_fast_hypercall(&hc)) 1987 kvm_hv_hypercall_read_xmm(&hc); 1988 1989 trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, 1990 hc.ingpa, hc.outgpa); 1991 1992 switch (hc.code) { 1993 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1994 if (unlikely(hc.rep)) { 1995 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1996 break; 1997 } 1998 kvm_vcpu_on_spin(vcpu, true); 1999 break; 2000 case HVCALL_SIGNAL_EVENT: 2001 if (unlikely(hc.rep)) { 2002 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2003 break; 2004 } 2005 ret = kvm_hvcall_signal_event(vcpu, &hc); 2006 if (ret != HV_STATUS_INVALID_PORT_ID) 2007 break; 2008 fallthrough; /* maybe userspace knows this conn_id */ 2009 case HVCALL_POST_MESSAGE: 2010 /* don't bother userspace if it has no way to handle it */ 2011 if (unlikely(hc.rep || !to_hv_synic(vcpu)->active)) { 2012 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2013 break; 2014 } 2015 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 2016 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 2017 vcpu->run->hyperv.u.hcall.input = hc.param; 2018 vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa; 2019 vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa; 2020 vcpu->arch.complete_userspace_io = 2021 kvm_hv_hypercall_complete_userspace; 2022 return 0; 2023 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 2024 if (unlikely(!hc.rep_cnt || hc.rep_idx)) { 2025 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2026 break; 2027 } 2028 ret = kvm_hv_flush_tlb(vcpu, &hc, false); 2029 break; 2030 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 2031 if (unlikely(hc.rep)) { 2032 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2033 break; 2034 } 2035 ret = kvm_hv_flush_tlb(vcpu, &hc, false); 2036 break; 2037 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 2038 if (unlikely(!hc.rep_cnt || hc.rep_idx)) { 2039 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2040 break; 2041 } 2042 ret = kvm_hv_flush_tlb(vcpu, &hc, true); 2043 break; 2044 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 2045 if (unlikely(hc.rep)) { 2046 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2047 break; 2048 } 2049 ret = kvm_hv_flush_tlb(vcpu, &hc, true); 2050 break; 2051 case HVCALL_SEND_IPI: 2052 if (unlikely(hc.rep)) { 2053 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2054 break; 2055 } 2056 ret = kvm_hv_send_ipi(vcpu, &hc, false); 2057 break; 2058 case HVCALL_SEND_IPI_EX: 2059 if (unlikely(hc.fast || hc.rep)) { 2060 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 2061 break; 2062 } 2063 ret = kvm_hv_send_ipi(vcpu, &hc, true); 2064 break; 2065 case HVCALL_POST_DEBUG_DATA: 2066 case HVCALL_RETRIEVE_DEBUG_DATA: 2067 if (unlikely(hc.fast)) { 2068 ret = HV_STATUS_INVALID_PARAMETER; 2069 break; 2070 } 2071 fallthrough; 2072 case HVCALL_RESET_DEBUG_SESSION: { 2073 struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu); 2074 2075 if (!kvm_hv_is_syndbg_enabled(vcpu)) { 2076 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 2077 break; 2078 } 2079 2080 if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) { 2081 ret = HV_STATUS_OPERATION_DENIED; 2082 break; 2083 } 2084 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 2085 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 2086 vcpu->run->hyperv.u.hcall.input = hc.param; 2087 vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa; 2088 vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa; 2089 vcpu->arch.complete_userspace_io = 2090 kvm_hv_hypercall_complete_userspace; 2091 return 0; 2092 } 2093 default: 2094 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 2095 break; 2096 } 2097 2098 return kvm_hv_hypercall_complete(vcpu, ret); 2099 } 2100 2101 void kvm_hv_init_vm(struct kvm *kvm) 2102 { 2103 struct kvm_hv *hv = to_kvm_hv(kvm); 2104 2105 mutex_init(&hv->hv_lock); 2106 idr_init(&hv->conn_to_evt); 2107 } 2108 2109 void kvm_hv_destroy_vm(struct kvm *kvm) 2110 { 2111 struct kvm_hv *hv = to_kvm_hv(kvm); 2112 struct eventfd_ctx *eventfd; 2113 int i; 2114 2115 idr_for_each_entry(&hv->conn_to_evt, eventfd, i) 2116 eventfd_ctx_put(eventfd); 2117 idr_destroy(&hv->conn_to_evt); 2118 } 2119 2120 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 2121 { 2122 struct kvm_hv *hv = to_kvm_hv(kvm); 2123 struct eventfd_ctx *eventfd; 2124 int ret; 2125 2126 eventfd = eventfd_ctx_fdget(fd); 2127 if (IS_ERR(eventfd)) 2128 return PTR_ERR(eventfd); 2129 2130 mutex_lock(&hv->hv_lock); 2131 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 2132 GFP_KERNEL_ACCOUNT); 2133 mutex_unlock(&hv->hv_lock); 2134 2135 if (ret >= 0) 2136 return 0; 2137 2138 if (ret == -ENOSPC) 2139 ret = -EEXIST; 2140 eventfd_ctx_put(eventfd); 2141 return ret; 2142 } 2143 2144 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 2145 { 2146 struct kvm_hv *hv = to_kvm_hv(kvm); 2147 struct eventfd_ctx *eventfd; 2148 2149 mutex_lock(&hv->hv_lock); 2150 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 2151 mutex_unlock(&hv->hv_lock); 2152 2153 if (!eventfd) 2154 return -ENOENT; 2155 2156 synchronize_srcu(&kvm->srcu); 2157 eventfd_ctx_put(eventfd); 2158 return 0; 2159 } 2160 2161 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 2162 { 2163 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 2164 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 2165 return -EINVAL; 2166 2167 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 2168 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 2169 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 2170 } 2171 2172 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 2173 struct kvm_cpuid_entry2 __user *entries) 2174 { 2175 uint16_t evmcs_ver = 0; 2176 struct kvm_cpuid_entry2 cpuid_entries[] = { 2177 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 2178 { .function = HYPERV_CPUID_INTERFACE }, 2179 { .function = HYPERV_CPUID_VERSION }, 2180 { .function = HYPERV_CPUID_FEATURES }, 2181 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 2182 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 2183 { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS }, 2184 { .function = HYPERV_CPUID_SYNDBG_INTERFACE }, 2185 { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }, 2186 { .function = HYPERV_CPUID_NESTED_FEATURES }, 2187 }; 2188 int i, nent = ARRAY_SIZE(cpuid_entries); 2189 2190 if (kvm_x86_ops.nested_ops->get_evmcs_version) 2191 evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu); 2192 2193 /* Skip NESTED_FEATURES if eVMCS is not supported */ 2194 if (!evmcs_ver) 2195 --nent; 2196 2197 if (cpuid->nent < nent) 2198 return -E2BIG; 2199 2200 if (cpuid->nent > nent) 2201 cpuid->nent = nent; 2202 2203 for (i = 0; i < nent; i++) { 2204 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 2205 u32 signature[3]; 2206 2207 switch (ent->function) { 2208 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 2209 memcpy(signature, "Linux KVM Hv", 12); 2210 2211 ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; 2212 ent->ebx = signature[0]; 2213 ent->ecx = signature[1]; 2214 ent->edx = signature[2]; 2215 break; 2216 2217 case HYPERV_CPUID_INTERFACE: 2218 ent->eax = HYPERV_CPUID_SIGNATURE_EAX; 2219 break; 2220 2221 case HYPERV_CPUID_VERSION: 2222 /* 2223 * We implement some Hyper-V 2016 functions so let's use 2224 * this version. 2225 */ 2226 ent->eax = 0x00003839; 2227 ent->ebx = 0x000A0000; 2228 break; 2229 2230 case HYPERV_CPUID_FEATURES: 2231 ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE; 2232 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 2233 ent->eax |= HV_MSR_SYNIC_AVAILABLE; 2234 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 2235 ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE; 2236 ent->eax |= HV_MSR_HYPERCALL_AVAILABLE; 2237 ent->eax |= HV_MSR_VP_INDEX_AVAILABLE; 2238 ent->eax |= HV_MSR_RESET_AVAILABLE; 2239 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 2240 ent->eax |= HV_ACCESS_FREQUENCY_MSRS; 2241 ent->eax |= HV_ACCESS_REENLIGHTENMENT; 2242 2243 ent->ebx |= HV_POST_MESSAGES; 2244 ent->ebx |= HV_SIGNAL_EVENTS; 2245 2246 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 2247 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 2248 2249 ent->ebx |= HV_DEBUGGING; 2250 ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; 2251 ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; 2252 2253 /* 2254 * Direct Synthetic timers only make sense with in-kernel 2255 * LAPIC 2256 */ 2257 if (!vcpu || lapic_in_kernel(vcpu)) 2258 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 2259 2260 break; 2261 2262 case HYPERV_CPUID_ENLIGHTMENT_INFO: 2263 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 2264 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 2265 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 2266 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 2267 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 2268 if (evmcs_ver) 2269 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 2270 if (!cpu_smt_possible()) 2271 ent->eax |= HV_X64_NO_NONARCH_CORESHARING; 2272 /* 2273 * Default number of spinlock retry attempts, matches 2274 * HyperV 2016. 2275 */ 2276 ent->ebx = 0x00000FFF; 2277 2278 break; 2279 2280 case HYPERV_CPUID_IMPLEMENT_LIMITS: 2281 /* Maximum number of virtual processors */ 2282 ent->eax = KVM_MAX_VCPUS; 2283 /* 2284 * Maximum number of logical processors, matches 2285 * HyperV 2016. 2286 */ 2287 ent->ebx = 64; 2288 2289 break; 2290 2291 case HYPERV_CPUID_NESTED_FEATURES: 2292 ent->eax = evmcs_ver; 2293 2294 break; 2295 2296 case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: 2297 memcpy(signature, "Linux KVM Hv", 12); 2298 2299 ent->eax = 0; 2300 ent->ebx = signature[0]; 2301 ent->ecx = signature[1]; 2302 ent->edx = signature[2]; 2303 break; 2304 2305 case HYPERV_CPUID_SYNDBG_INTERFACE: 2306 memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); 2307 ent->eax = signature[0]; 2308 break; 2309 2310 case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES: 2311 ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 2312 break; 2313 2314 default: 2315 break; 2316 } 2317 } 2318 2319 if (copy_to_user(entries, cpuid_entries, 2320 nent * sizeof(struct kvm_cpuid_entry2))) 2321 return -EFAULT; 2322 2323 return 0; 2324 } 2325