1 /* 2 * KVM Microsoft Hyper-V emulation 3 * 4 * derived from arch/x86/kvm/x86.c 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc. 8 * Copyright IBM Corporation, 2008 9 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 11 * 12 * Authors: 13 * Avi Kivity <avi@qumranet.com> 14 * Yaniv Kamay <yaniv@qumranet.com> 15 * Amit Shah <amit.shah@qumranet.com> 16 * Ben-Ami Yassour <benami@il.ibm.com> 17 * Andrey Smetanin <asmetanin@virtuozzo.com> 18 * 19 * This work is licensed under the terms of the GNU GPL, version 2. See 20 * the COPYING file in the top-level directory. 21 * 22 */ 23 24 #include "x86.h" 25 #include "lapic.h" 26 #include "ioapic.h" 27 #include "hyperv.h" 28 29 #include <linux/kvm_host.h> 30 #include <linux/highmem.h> 31 #include <linux/sched/cputime.h> 32 #include <linux/eventfd.h> 33 34 #include <asm/apicdef.h> 35 #include <trace/events/kvm.h> 36 37 #include "trace.h" 38 39 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 40 41 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 42 bool vcpu_kick); 43 44 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 45 { 46 return atomic64_read(&synic->sint[sint]); 47 } 48 49 static inline int synic_get_sint_vector(u64 sint_value) 50 { 51 if (sint_value & HV_SYNIC_SINT_MASKED) 52 return -1; 53 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 54 } 55 56 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 57 int vector) 58 { 59 int i; 60 61 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 62 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 63 return true; 64 } 65 return false; 66 } 67 68 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 69 int vector) 70 { 71 int i; 72 u64 sint_value; 73 74 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 75 sint_value = synic_read_sint(synic, i); 76 if (synic_get_sint_vector(sint_value) == vector && 77 sint_value & HV_SYNIC_SINT_AUTO_EOI) 78 return true; 79 } 80 return false; 81 } 82 83 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 84 int vector) 85 { 86 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 87 return; 88 89 if (synic_has_vector_connected(synic, vector)) 90 __set_bit(vector, synic->vec_bitmap); 91 else 92 __clear_bit(vector, synic->vec_bitmap); 93 94 if (synic_has_vector_auto_eoi(synic, vector)) 95 __set_bit(vector, synic->auto_eoi_bitmap); 96 else 97 __clear_bit(vector, synic->auto_eoi_bitmap); 98 } 99 100 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 101 u64 data, bool host) 102 { 103 int vector, old_vector; 104 bool masked; 105 106 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 107 masked = data & HV_SYNIC_SINT_MASKED; 108 109 /* 110 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 111 * default '0x10000' value on boot and this should not #GP. We need to 112 * allow zero-initing the register from host as well. 113 */ 114 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 115 return 1; 116 /* 117 * Guest may configure multiple SINTs to use the same vector, so 118 * we maintain a bitmap of vectors handled by synic, and a 119 * bitmap of vectors with auto-eoi behavior. The bitmaps are 120 * updated here, and atomically queried on fast paths. 121 */ 122 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 123 124 atomic64_set(&synic->sint[sint], data); 125 126 synic_update_vector(synic, old_vector); 127 128 synic_update_vector(synic, vector); 129 130 /* Load SynIC vectors into EOI exit bitmap */ 131 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 132 return 0; 133 } 134 135 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 136 { 137 struct kvm_vcpu *vcpu = NULL; 138 int i; 139 140 if (vpidx >= KVM_MAX_VCPUS) 141 return NULL; 142 143 vcpu = kvm_get_vcpu(kvm, vpidx); 144 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 145 return vcpu; 146 kvm_for_each_vcpu(i, vcpu, kvm) 147 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 148 return vcpu; 149 return NULL; 150 } 151 152 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 153 { 154 struct kvm_vcpu *vcpu; 155 struct kvm_vcpu_hv_synic *synic; 156 157 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 158 if (!vcpu) 159 return NULL; 160 synic = vcpu_to_synic(vcpu); 161 return (synic->active) ? synic : NULL; 162 } 163 164 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 165 { 166 struct kvm *kvm = vcpu->kvm; 167 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 168 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 169 struct kvm_vcpu_hv_stimer *stimer; 170 int gsi, idx; 171 172 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 173 174 /* Try to deliver pending Hyper-V SynIC timers messages */ 175 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 176 stimer = &hv_vcpu->stimer[idx]; 177 if (stimer->msg_pending && stimer->config.enable && 178 !stimer->config.direct_mode && 179 stimer->config.sintx == sint) 180 stimer_mark_pending(stimer, false); 181 } 182 183 idx = srcu_read_lock(&kvm->irq_srcu); 184 gsi = atomic_read(&synic->sint_to_gsi[sint]); 185 if (gsi != -1) 186 kvm_notify_acked_gsi(kvm, gsi); 187 srcu_read_unlock(&kvm->irq_srcu, idx); 188 } 189 190 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 191 { 192 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 193 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 194 195 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 196 hv_vcpu->exit.u.synic.msr = msr; 197 hv_vcpu->exit.u.synic.control = synic->control; 198 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 199 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 200 201 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 202 } 203 204 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 205 u32 msr, u64 data, bool host) 206 { 207 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 208 int ret; 209 210 if (!synic->active && !host) 211 return 1; 212 213 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 214 215 ret = 0; 216 switch (msr) { 217 case HV_X64_MSR_SCONTROL: 218 synic->control = data; 219 if (!host) 220 synic_exit(synic, msr); 221 break; 222 case HV_X64_MSR_SVERSION: 223 if (!host) { 224 ret = 1; 225 break; 226 } 227 synic->version = data; 228 break; 229 case HV_X64_MSR_SIEFP: 230 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 231 !synic->dont_zero_synic_pages) 232 if (kvm_clear_guest(vcpu->kvm, 233 data & PAGE_MASK, PAGE_SIZE)) { 234 ret = 1; 235 break; 236 } 237 synic->evt_page = data; 238 if (!host) 239 synic_exit(synic, msr); 240 break; 241 case HV_X64_MSR_SIMP: 242 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 243 !synic->dont_zero_synic_pages) 244 if (kvm_clear_guest(vcpu->kvm, 245 data & PAGE_MASK, PAGE_SIZE)) { 246 ret = 1; 247 break; 248 } 249 synic->msg_page = data; 250 if (!host) 251 synic_exit(synic, msr); 252 break; 253 case HV_X64_MSR_EOM: { 254 int i; 255 256 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 257 kvm_hv_notify_acked_sint(vcpu, i); 258 break; 259 } 260 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 261 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 262 break; 263 default: 264 ret = 1; 265 break; 266 } 267 return ret; 268 } 269 270 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 271 bool host) 272 { 273 int ret; 274 275 if (!synic->active && !host) 276 return 1; 277 278 ret = 0; 279 switch (msr) { 280 case HV_X64_MSR_SCONTROL: 281 *pdata = synic->control; 282 break; 283 case HV_X64_MSR_SVERSION: 284 *pdata = synic->version; 285 break; 286 case HV_X64_MSR_SIEFP: 287 *pdata = synic->evt_page; 288 break; 289 case HV_X64_MSR_SIMP: 290 *pdata = synic->msg_page; 291 break; 292 case HV_X64_MSR_EOM: 293 *pdata = 0; 294 break; 295 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 296 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 297 break; 298 default: 299 ret = 1; 300 break; 301 } 302 return ret; 303 } 304 305 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 306 { 307 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 308 struct kvm_lapic_irq irq; 309 int ret, vector; 310 311 if (sint >= ARRAY_SIZE(synic->sint)) 312 return -EINVAL; 313 314 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 315 if (vector < 0) 316 return -ENOENT; 317 318 memset(&irq, 0, sizeof(irq)); 319 irq.shorthand = APIC_DEST_SELF; 320 irq.dest_mode = APIC_DEST_PHYSICAL; 321 irq.delivery_mode = APIC_DM_FIXED; 322 irq.vector = vector; 323 irq.level = 1; 324 325 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 326 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 327 return ret; 328 } 329 330 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 331 { 332 struct kvm_vcpu_hv_synic *synic; 333 334 synic = synic_get(kvm, vpidx); 335 if (!synic) 336 return -EINVAL; 337 338 return synic_set_irq(synic, sint); 339 } 340 341 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 342 { 343 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 344 int i; 345 346 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 347 348 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 349 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 350 kvm_hv_notify_acked_sint(vcpu, i); 351 } 352 353 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 354 { 355 struct kvm_vcpu_hv_synic *synic; 356 357 synic = synic_get(kvm, vpidx); 358 if (!synic) 359 return -EINVAL; 360 361 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 362 return -EINVAL; 363 364 atomic_set(&synic->sint_to_gsi[sint], gsi); 365 return 0; 366 } 367 368 void kvm_hv_irq_routing_update(struct kvm *kvm) 369 { 370 struct kvm_irq_routing_table *irq_rt; 371 struct kvm_kernel_irq_routing_entry *e; 372 u32 gsi; 373 374 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 375 lockdep_is_held(&kvm->irq_lock)); 376 377 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 378 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 379 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 380 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 381 e->hv_sint.sint, gsi); 382 } 383 } 384 } 385 386 static void synic_init(struct kvm_vcpu_hv_synic *synic) 387 { 388 int i; 389 390 memset(synic, 0, sizeof(*synic)); 391 synic->version = HV_SYNIC_VERSION_1; 392 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 393 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 394 atomic_set(&synic->sint_to_gsi[i], -1); 395 } 396 } 397 398 static u64 get_time_ref_counter(struct kvm *kvm) 399 { 400 struct kvm_hv *hv = &kvm->arch.hyperv; 401 struct kvm_vcpu *vcpu; 402 u64 tsc; 403 404 /* 405 * The guest has not set up the TSC page or the clock isn't 406 * stable, fall back to get_kvmclock_ns. 407 */ 408 if (!hv->tsc_ref.tsc_sequence) 409 return div_u64(get_kvmclock_ns(kvm), 100); 410 411 vcpu = kvm_get_vcpu(kvm, 0); 412 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 413 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 414 + hv->tsc_ref.tsc_offset; 415 } 416 417 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 418 bool vcpu_kick) 419 { 420 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 421 422 set_bit(stimer->index, 423 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 424 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 425 if (vcpu_kick) 426 kvm_vcpu_kick(vcpu); 427 } 428 429 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 430 { 431 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 432 433 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 434 stimer->index); 435 436 hrtimer_cancel(&stimer->timer); 437 clear_bit(stimer->index, 438 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 439 stimer->msg_pending = false; 440 stimer->exp_time = 0; 441 } 442 443 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 444 { 445 struct kvm_vcpu_hv_stimer *stimer; 446 447 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 448 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 449 stimer->index); 450 stimer_mark_pending(stimer, true); 451 452 return HRTIMER_NORESTART; 453 } 454 455 /* 456 * stimer_start() assumptions: 457 * a) stimer->count is not equal to 0 458 * b) stimer->config has HV_STIMER_ENABLE flag 459 */ 460 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 461 { 462 u64 time_now; 463 ktime_t ktime_now; 464 465 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 466 ktime_now = ktime_get(); 467 468 if (stimer->config.periodic) { 469 if (stimer->exp_time) { 470 if (time_now >= stimer->exp_time) { 471 u64 remainder; 472 473 div64_u64_rem(time_now - stimer->exp_time, 474 stimer->count, &remainder); 475 stimer->exp_time = 476 time_now + (stimer->count - remainder); 477 } 478 } else 479 stimer->exp_time = time_now + stimer->count; 480 481 trace_kvm_hv_stimer_start_periodic( 482 stimer_to_vcpu(stimer)->vcpu_id, 483 stimer->index, 484 time_now, stimer->exp_time); 485 486 hrtimer_start(&stimer->timer, 487 ktime_add_ns(ktime_now, 488 100 * (stimer->exp_time - time_now)), 489 HRTIMER_MODE_ABS); 490 return 0; 491 } 492 stimer->exp_time = stimer->count; 493 if (time_now >= stimer->count) { 494 /* 495 * Expire timer according to Hypervisor Top-Level Functional 496 * specification v4(15.3.1): 497 * "If a one shot is enabled and the specified count is in 498 * the past, it will expire immediately." 499 */ 500 stimer_mark_pending(stimer, false); 501 return 0; 502 } 503 504 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 505 stimer->index, 506 time_now, stimer->count); 507 508 hrtimer_start(&stimer->timer, 509 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 510 HRTIMER_MODE_ABS); 511 return 0; 512 } 513 514 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 515 bool host) 516 { 517 union hv_stimer_config new_config = {.as_uint64 = config}, 518 old_config = {.as_uint64 = stimer->config.as_uint64}; 519 520 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 521 stimer->index, config, host); 522 523 stimer_cleanup(stimer); 524 if (old_config.enable && 525 !new_config.direct_mode && new_config.sintx == 0) 526 new_config.enable = 0; 527 stimer->config.as_uint64 = new_config.as_uint64; 528 529 stimer_mark_pending(stimer, false); 530 return 0; 531 } 532 533 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 534 bool host) 535 { 536 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 537 stimer->index, count, host); 538 539 stimer_cleanup(stimer); 540 stimer->count = count; 541 if (stimer->count == 0) 542 stimer->config.enable = 0; 543 else if (stimer->config.auto_enable) 544 stimer->config.enable = 1; 545 stimer_mark_pending(stimer, false); 546 return 0; 547 } 548 549 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 550 { 551 *pconfig = stimer->config.as_uint64; 552 return 0; 553 } 554 555 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 556 { 557 *pcount = stimer->count; 558 return 0; 559 } 560 561 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 562 struct hv_message *src_msg, bool no_retry) 563 { 564 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 565 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 566 gfn_t msg_page_gfn; 567 struct hv_message_header hv_hdr; 568 int r; 569 570 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 571 return -ENOENT; 572 573 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 574 575 /* 576 * Strictly following the spec-mandated ordering would assume setting 577 * .msg_pending before checking .message_type. However, this function 578 * is only called in vcpu context so the entire update is atomic from 579 * guest POV and thus the exact order here doesn't matter. 580 */ 581 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 582 msg_off + offsetof(struct hv_message, 583 header.message_type), 584 sizeof(hv_hdr.message_type)); 585 if (r < 0) 586 return r; 587 588 if (hv_hdr.message_type != HVMSG_NONE) { 589 if (no_retry) 590 return 0; 591 592 hv_hdr.message_flags.msg_pending = 1; 593 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 594 &hv_hdr.message_flags, 595 msg_off + 596 offsetof(struct hv_message, 597 header.message_flags), 598 sizeof(hv_hdr.message_flags)); 599 if (r < 0) 600 return r; 601 return -EAGAIN; 602 } 603 604 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 605 sizeof(src_msg->header) + 606 src_msg->header.payload_size); 607 if (r < 0) 608 return r; 609 610 r = synic_set_irq(synic, sint); 611 if (r < 0) 612 return r; 613 if (r == 0) 614 return -EFAULT; 615 return 0; 616 } 617 618 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 619 { 620 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 621 struct hv_message *msg = &stimer->msg; 622 struct hv_timer_message_payload *payload = 623 (struct hv_timer_message_payload *)&msg->u.payload; 624 625 /* 626 * To avoid piling up periodic ticks, don't retry message 627 * delivery for them (within "lazy" lost ticks policy). 628 */ 629 bool no_retry = stimer->config.periodic; 630 631 payload->expiration_time = stimer->exp_time; 632 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 633 return synic_deliver_msg(vcpu_to_synic(vcpu), 634 stimer->config.sintx, msg, 635 no_retry); 636 } 637 638 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 639 { 640 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 641 struct kvm_lapic_irq irq = { 642 .delivery_mode = APIC_DM_FIXED, 643 .vector = stimer->config.apic_vector 644 }; 645 646 return !kvm_apic_set_irq(vcpu, &irq, NULL); 647 } 648 649 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 650 { 651 int r, direct = stimer->config.direct_mode; 652 653 stimer->msg_pending = true; 654 if (!direct) 655 r = stimer_send_msg(stimer); 656 else 657 r = stimer_notify_direct(stimer); 658 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 659 stimer->index, direct, r); 660 if (!r) { 661 stimer->msg_pending = false; 662 if (!(stimer->config.periodic)) 663 stimer->config.enable = 0; 664 } 665 } 666 667 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 668 { 669 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 670 struct kvm_vcpu_hv_stimer *stimer; 671 u64 time_now, exp_time; 672 int i; 673 674 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 675 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 676 stimer = &hv_vcpu->stimer[i]; 677 if (stimer->config.enable) { 678 exp_time = stimer->exp_time; 679 680 if (exp_time) { 681 time_now = 682 get_time_ref_counter(vcpu->kvm); 683 if (time_now >= exp_time) 684 stimer_expiration(stimer); 685 } 686 687 if ((stimer->config.enable) && 688 stimer->count) { 689 if (!stimer->msg_pending) 690 stimer_start(stimer); 691 } else 692 stimer_cleanup(stimer); 693 } 694 } 695 } 696 697 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 698 { 699 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 700 int i; 701 702 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 703 stimer_cleanup(&hv_vcpu->stimer[i]); 704 } 705 706 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 707 { 708 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 709 return false; 710 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 711 } 712 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 713 714 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 715 struct hv_vp_assist_page *assist_page) 716 { 717 if (!kvm_hv_assist_page_enabled(vcpu)) 718 return false; 719 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 720 assist_page, sizeof(*assist_page)); 721 } 722 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 723 724 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 725 { 726 struct hv_message *msg = &stimer->msg; 727 struct hv_timer_message_payload *payload = 728 (struct hv_timer_message_payload *)&msg->u.payload; 729 730 memset(&msg->header, 0, sizeof(msg->header)); 731 msg->header.message_type = HVMSG_TIMER_EXPIRED; 732 msg->header.payload_size = sizeof(*payload); 733 734 payload->timer_index = stimer->index; 735 payload->expiration_time = 0; 736 payload->delivery_time = 0; 737 } 738 739 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 740 { 741 memset(stimer, 0, sizeof(*stimer)); 742 stimer->index = timer_index; 743 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 744 stimer->timer.function = stimer_timer_callback; 745 stimer_prepare_msg(stimer); 746 } 747 748 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 749 { 750 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 751 int i; 752 753 synic_init(&hv_vcpu->synic); 754 755 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 756 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 757 stimer_init(&hv_vcpu->stimer[i], i); 758 } 759 760 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 761 { 762 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 763 764 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 765 } 766 767 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 768 { 769 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 770 771 /* 772 * Hyper-V SynIC auto EOI SINT's are 773 * not compatible with APICV, so deactivate APICV 774 */ 775 kvm_vcpu_deactivate_apicv(vcpu); 776 synic->active = true; 777 synic->dont_zero_synic_pages = dont_zero_synic_pages; 778 return 0; 779 } 780 781 static bool kvm_hv_msr_partition_wide(u32 msr) 782 { 783 bool r = false; 784 785 switch (msr) { 786 case HV_X64_MSR_GUEST_OS_ID: 787 case HV_X64_MSR_HYPERCALL: 788 case HV_X64_MSR_REFERENCE_TSC: 789 case HV_X64_MSR_TIME_REF_COUNT: 790 case HV_X64_MSR_CRASH_CTL: 791 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 792 case HV_X64_MSR_RESET: 793 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 794 case HV_X64_MSR_TSC_EMULATION_CONTROL: 795 case HV_X64_MSR_TSC_EMULATION_STATUS: 796 r = true; 797 break; 798 } 799 800 return r; 801 } 802 803 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 804 u32 index, u64 *pdata) 805 { 806 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 807 808 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 809 return -EINVAL; 810 811 *pdata = hv->hv_crash_param[index]; 812 return 0; 813 } 814 815 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 816 { 817 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 818 819 *pdata = hv->hv_crash_ctl; 820 return 0; 821 } 822 823 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 824 { 825 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 826 827 if (host) 828 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 829 830 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 831 832 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 833 hv->hv_crash_param[0], 834 hv->hv_crash_param[1], 835 hv->hv_crash_param[2], 836 hv->hv_crash_param[3], 837 hv->hv_crash_param[4]); 838 839 /* Send notification about crash to user space */ 840 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 841 } 842 843 return 0; 844 } 845 846 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 847 u32 index, u64 data) 848 { 849 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 850 851 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 852 return -EINVAL; 853 854 hv->hv_crash_param[index] = data; 855 return 0; 856 } 857 858 /* 859 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 860 * between them is possible: 861 * 862 * kvmclock formula: 863 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 864 * + system_time 865 * 866 * Hyper-V formula: 867 * nsec/100 = ticks * scale / 2^64 + offset 868 * 869 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 870 * By dividing the kvmclock formula by 100 and equating what's left we get: 871 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 872 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 873 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 874 * 875 * Now expand the kvmclock formula and divide by 100: 876 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 877 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 878 * + system_time 879 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 880 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 881 * + system_time / 100 882 * 883 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 884 * nsec/100 = ticks * scale / 2^64 885 * - tsc_timestamp * scale / 2^64 886 * + system_time / 100 887 * 888 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 889 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 890 * 891 * These two equivalencies are implemented in this function. 892 */ 893 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 894 HV_REFERENCE_TSC_PAGE *tsc_ref) 895 { 896 u64 max_mul; 897 898 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 899 return false; 900 901 /* 902 * check if scale would overflow, if so we use the time ref counter 903 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 904 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 905 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 906 */ 907 max_mul = 100ull << (32 - hv_clock->tsc_shift); 908 if (hv_clock->tsc_to_system_mul >= max_mul) 909 return false; 910 911 /* 912 * Otherwise compute the scale and offset according to the formulas 913 * derived above. 914 */ 915 tsc_ref->tsc_scale = 916 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 917 hv_clock->tsc_to_system_mul, 918 100); 919 920 tsc_ref->tsc_offset = hv_clock->system_time; 921 do_div(tsc_ref->tsc_offset, 100); 922 tsc_ref->tsc_offset -= 923 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 924 return true; 925 } 926 927 void kvm_hv_setup_tsc_page(struct kvm *kvm, 928 struct pvclock_vcpu_time_info *hv_clock) 929 { 930 struct kvm_hv *hv = &kvm->arch.hyperv; 931 u32 tsc_seq; 932 u64 gfn; 933 934 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 935 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 936 937 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 938 return; 939 940 mutex_lock(&kvm->arch.hyperv.hv_lock); 941 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 942 goto out_unlock; 943 944 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 945 /* 946 * Because the TSC parameters only vary when there is a 947 * change in the master clock, do not bother with caching. 948 */ 949 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 950 &tsc_seq, sizeof(tsc_seq)))) 951 goto out_unlock; 952 953 /* 954 * While we're computing and writing the parameters, force the 955 * guest to use the time reference count MSR. 956 */ 957 hv->tsc_ref.tsc_sequence = 0; 958 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 959 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 960 goto out_unlock; 961 962 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 963 goto out_unlock; 964 965 /* Ensure sequence is zero before writing the rest of the struct. */ 966 smp_wmb(); 967 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 968 goto out_unlock; 969 970 /* 971 * Now switch to the TSC page mechanism by writing the sequence. 972 */ 973 tsc_seq++; 974 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 975 tsc_seq = 1; 976 977 /* Write the struct entirely before the non-zero sequence. */ 978 smp_wmb(); 979 980 hv->tsc_ref.tsc_sequence = tsc_seq; 981 kvm_write_guest(kvm, gfn_to_gpa(gfn), 982 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 983 out_unlock: 984 mutex_unlock(&kvm->arch.hyperv.hv_lock); 985 } 986 987 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 988 bool host) 989 { 990 struct kvm *kvm = vcpu->kvm; 991 struct kvm_hv *hv = &kvm->arch.hyperv; 992 993 switch (msr) { 994 case HV_X64_MSR_GUEST_OS_ID: 995 hv->hv_guest_os_id = data; 996 /* setting guest os id to zero disables hypercall page */ 997 if (!hv->hv_guest_os_id) 998 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 999 break; 1000 case HV_X64_MSR_HYPERCALL: { 1001 u64 gfn; 1002 unsigned long addr; 1003 u8 instructions[4]; 1004 1005 /* if guest os id is not set hypercall should remain disabled */ 1006 if (!hv->hv_guest_os_id) 1007 break; 1008 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1009 hv->hv_hypercall = data; 1010 break; 1011 } 1012 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1013 addr = gfn_to_hva(kvm, gfn); 1014 if (kvm_is_error_hva(addr)) 1015 return 1; 1016 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1017 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1018 if (__copy_to_user((void __user *)addr, instructions, 4)) 1019 return 1; 1020 hv->hv_hypercall = data; 1021 mark_page_dirty(kvm, gfn); 1022 break; 1023 } 1024 case HV_X64_MSR_REFERENCE_TSC: 1025 hv->hv_tsc_page = data; 1026 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1027 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1028 break; 1029 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1030 return kvm_hv_msr_set_crash_data(vcpu, 1031 msr - HV_X64_MSR_CRASH_P0, 1032 data); 1033 case HV_X64_MSR_CRASH_CTL: 1034 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1035 case HV_X64_MSR_RESET: 1036 if (data == 1) { 1037 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1038 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1039 } 1040 break; 1041 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1042 hv->hv_reenlightenment_control = data; 1043 break; 1044 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1045 hv->hv_tsc_emulation_control = data; 1046 break; 1047 case HV_X64_MSR_TSC_EMULATION_STATUS: 1048 hv->hv_tsc_emulation_status = data; 1049 break; 1050 case HV_X64_MSR_TIME_REF_COUNT: 1051 /* read-only, but still ignore it if host-initiated */ 1052 if (!host) 1053 return 1; 1054 break; 1055 default: 1056 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1057 msr, data); 1058 return 1; 1059 } 1060 return 0; 1061 } 1062 1063 /* Calculate cpu time spent by current task in 100ns units */ 1064 static u64 current_task_runtime_100ns(void) 1065 { 1066 u64 utime, stime; 1067 1068 task_cputime_adjusted(current, &utime, &stime); 1069 1070 return div_u64(utime + stime, 100); 1071 } 1072 1073 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1074 { 1075 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1076 1077 switch (msr) { 1078 case HV_X64_MSR_VP_INDEX: { 1079 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1080 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1081 u32 new_vp_index = (u32)data; 1082 1083 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1084 return 1; 1085 1086 if (new_vp_index == hv_vcpu->vp_index) 1087 return 0; 1088 1089 /* 1090 * The VP index is initialized to vcpu_index by 1091 * kvm_hv_vcpu_postcreate so they initially match. Now the 1092 * VP index is changing, adjust num_mismatched_vp_indexes if 1093 * it now matches or no longer matches vcpu_idx. 1094 */ 1095 if (hv_vcpu->vp_index == vcpu_idx) 1096 atomic_inc(&hv->num_mismatched_vp_indexes); 1097 else if (new_vp_index == vcpu_idx) 1098 atomic_dec(&hv->num_mismatched_vp_indexes); 1099 1100 hv_vcpu->vp_index = new_vp_index; 1101 break; 1102 } 1103 case HV_X64_MSR_VP_ASSIST_PAGE: { 1104 u64 gfn; 1105 unsigned long addr; 1106 1107 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1108 hv_vcpu->hv_vapic = data; 1109 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1110 return 1; 1111 break; 1112 } 1113 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1114 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1115 if (kvm_is_error_hva(addr)) 1116 return 1; 1117 1118 /* 1119 * Clear apic_assist portion of f(struct hv_vp_assist_page 1120 * only, there can be valuable data in the rest which needs 1121 * to be preserved e.g. on migration. 1122 */ 1123 if (__clear_user((void __user *)addr, sizeof(u32))) 1124 return 1; 1125 hv_vcpu->hv_vapic = data; 1126 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1127 if (kvm_lapic_enable_pv_eoi(vcpu, 1128 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1129 sizeof(struct hv_vp_assist_page))) 1130 return 1; 1131 break; 1132 } 1133 case HV_X64_MSR_EOI: 1134 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1135 case HV_X64_MSR_ICR: 1136 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1137 case HV_X64_MSR_TPR: 1138 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1139 case HV_X64_MSR_VP_RUNTIME: 1140 if (!host) 1141 return 1; 1142 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1143 break; 1144 case HV_X64_MSR_SCONTROL: 1145 case HV_X64_MSR_SVERSION: 1146 case HV_X64_MSR_SIEFP: 1147 case HV_X64_MSR_SIMP: 1148 case HV_X64_MSR_EOM: 1149 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1150 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1151 case HV_X64_MSR_STIMER0_CONFIG: 1152 case HV_X64_MSR_STIMER1_CONFIG: 1153 case HV_X64_MSR_STIMER2_CONFIG: 1154 case HV_X64_MSR_STIMER3_CONFIG: { 1155 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1156 1157 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1158 data, host); 1159 } 1160 case HV_X64_MSR_STIMER0_COUNT: 1161 case HV_X64_MSR_STIMER1_COUNT: 1162 case HV_X64_MSR_STIMER2_COUNT: 1163 case HV_X64_MSR_STIMER3_COUNT: { 1164 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1165 1166 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1167 data, host); 1168 } 1169 case HV_X64_MSR_TSC_FREQUENCY: 1170 case HV_X64_MSR_APIC_FREQUENCY: 1171 /* read-only, but still ignore it if host-initiated */ 1172 if (!host) 1173 return 1; 1174 break; 1175 default: 1176 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1177 msr, data); 1178 return 1; 1179 } 1180 1181 return 0; 1182 } 1183 1184 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1185 { 1186 u64 data = 0; 1187 struct kvm *kvm = vcpu->kvm; 1188 struct kvm_hv *hv = &kvm->arch.hyperv; 1189 1190 switch (msr) { 1191 case HV_X64_MSR_GUEST_OS_ID: 1192 data = hv->hv_guest_os_id; 1193 break; 1194 case HV_X64_MSR_HYPERCALL: 1195 data = hv->hv_hypercall; 1196 break; 1197 case HV_X64_MSR_TIME_REF_COUNT: 1198 data = get_time_ref_counter(kvm); 1199 break; 1200 case HV_X64_MSR_REFERENCE_TSC: 1201 data = hv->hv_tsc_page; 1202 break; 1203 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1204 return kvm_hv_msr_get_crash_data(vcpu, 1205 msr - HV_X64_MSR_CRASH_P0, 1206 pdata); 1207 case HV_X64_MSR_CRASH_CTL: 1208 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1209 case HV_X64_MSR_RESET: 1210 data = 0; 1211 break; 1212 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1213 data = hv->hv_reenlightenment_control; 1214 break; 1215 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1216 data = hv->hv_tsc_emulation_control; 1217 break; 1218 case HV_X64_MSR_TSC_EMULATION_STATUS: 1219 data = hv->hv_tsc_emulation_status; 1220 break; 1221 default: 1222 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1223 return 1; 1224 } 1225 1226 *pdata = data; 1227 return 0; 1228 } 1229 1230 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1231 bool host) 1232 { 1233 u64 data = 0; 1234 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1235 1236 switch (msr) { 1237 case HV_X64_MSR_VP_INDEX: 1238 data = hv_vcpu->vp_index; 1239 break; 1240 case HV_X64_MSR_EOI: 1241 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1242 case HV_X64_MSR_ICR: 1243 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1244 case HV_X64_MSR_TPR: 1245 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1246 case HV_X64_MSR_VP_ASSIST_PAGE: 1247 data = hv_vcpu->hv_vapic; 1248 break; 1249 case HV_X64_MSR_VP_RUNTIME: 1250 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1251 break; 1252 case HV_X64_MSR_SCONTROL: 1253 case HV_X64_MSR_SVERSION: 1254 case HV_X64_MSR_SIEFP: 1255 case HV_X64_MSR_SIMP: 1256 case HV_X64_MSR_EOM: 1257 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1258 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1259 case HV_X64_MSR_STIMER0_CONFIG: 1260 case HV_X64_MSR_STIMER1_CONFIG: 1261 case HV_X64_MSR_STIMER2_CONFIG: 1262 case HV_X64_MSR_STIMER3_CONFIG: { 1263 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1264 1265 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1266 pdata); 1267 } 1268 case HV_X64_MSR_STIMER0_COUNT: 1269 case HV_X64_MSR_STIMER1_COUNT: 1270 case HV_X64_MSR_STIMER2_COUNT: 1271 case HV_X64_MSR_STIMER3_COUNT: { 1272 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1273 1274 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1275 pdata); 1276 } 1277 case HV_X64_MSR_TSC_FREQUENCY: 1278 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1279 break; 1280 case HV_X64_MSR_APIC_FREQUENCY: 1281 data = APIC_BUS_FREQUENCY; 1282 break; 1283 default: 1284 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1285 return 1; 1286 } 1287 *pdata = data; 1288 return 0; 1289 } 1290 1291 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1292 { 1293 if (kvm_hv_msr_partition_wide(msr)) { 1294 int r; 1295 1296 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1297 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1298 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1299 return r; 1300 } else 1301 return kvm_hv_set_msr(vcpu, msr, data, host); 1302 } 1303 1304 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1305 { 1306 if (kvm_hv_msr_partition_wide(msr)) { 1307 int r; 1308 1309 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1310 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1311 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1312 return r; 1313 } else 1314 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1315 } 1316 1317 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1318 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1319 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1320 { 1321 struct kvm_hv *hv = &kvm->arch.hyperv; 1322 struct kvm_vcpu *vcpu; 1323 int i, bank, sbank = 0; 1324 1325 memset(vp_bitmap, 0, 1326 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1327 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1328 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1329 vp_bitmap[bank] = sparse_banks[sbank++]; 1330 1331 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1332 /* for all vcpus vp_index == vcpu_idx */ 1333 return (unsigned long *)vp_bitmap; 1334 } 1335 1336 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1337 kvm_for_each_vcpu(i, vcpu, kvm) { 1338 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1339 (unsigned long *)vp_bitmap)) 1340 __set_bit(i, vcpu_bitmap); 1341 } 1342 return vcpu_bitmap; 1343 } 1344 1345 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1346 u16 rep_cnt, bool ex) 1347 { 1348 struct kvm *kvm = current_vcpu->kvm; 1349 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1350 struct hv_tlb_flush_ex flush_ex; 1351 struct hv_tlb_flush flush; 1352 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1353 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1354 unsigned long *vcpu_mask; 1355 u64 valid_bank_mask; 1356 u64 sparse_banks[64]; 1357 int sparse_banks_len; 1358 bool all_cpus; 1359 1360 if (!ex) { 1361 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1362 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1363 1364 trace_kvm_hv_flush_tlb(flush.processor_mask, 1365 flush.address_space, flush.flags); 1366 1367 valid_bank_mask = BIT_ULL(0); 1368 sparse_banks[0] = flush.processor_mask; 1369 all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; 1370 } else { 1371 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1372 sizeof(flush_ex)))) 1373 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1374 1375 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1376 flush_ex.hv_vp_set.format, 1377 flush_ex.address_space, 1378 flush_ex.flags); 1379 1380 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1381 all_cpus = flush_ex.hv_vp_set.format != 1382 HV_GENERIC_SET_SPARSE_4K; 1383 1384 sparse_banks_len = 1385 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1386 sizeof(sparse_banks[0]); 1387 1388 if (!sparse_banks_len && !all_cpus) 1389 goto ret_success; 1390 1391 if (!all_cpus && 1392 kvm_read_guest(kvm, 1393 ingpa + offsetof(struct hv_tlb_flush_ex, 1394 hv_vp_set.bank_contents), 1395 sparse_banks, 1396 sparse_banks_len)) 1397 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1398 } 1399 1400 cpumask_clear(&hv_vcpu->tlb_flush); 1401 1402 vcpu_mask = all_cpus ? NULL : 1403 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1404 vp_bitmap, vcpu_bitmap); 1405 1406 /* 1407 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1408 * analyze it here, flush TLB regardless of the specified address space. 1409 */ 1410 kvm_make_vcpus_request_mask(kvm, 1411 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1412 vcpu_mask, &hv_vcpu->tlb_flush); 1413 1414 ret_success: 1415 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1416 return (u64)HV_STATUS_SUCCESS | 1417 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1418 } 1419 1420 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1421 unsigned long *vcpu_bitmap) 1422 { 1423 struct kvm_lapic_irq irq = { 1424 .delivery_mode = APIC_DM_FIXED, 1425 .vector = vector 1426 }; 1427 struct kvm_vcpu *vcpu; 1428 int i; 1429 1430 kvm_for_each_vcpu(i, vcpu, kvm) { 1431 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1432 continue; 1433 1434 /* We fail only when APIC is disabled */ 1435 kvm_apic_set_irq(vcpu, &irq, NULL); 1436 } 1437 } 1438 1439 static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1440 bool ex, bool fast) 1441 { 1442 struct kvm *kvm = current_vcpu->kvm; 1443 struct hv_send_ipi_ex send_ipi_ex; 1444 struct hv_send_ipi send_ipi; 1445 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1446 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1447 unsigned long *vcpu_mask; 1448 unsigned long valid_bank_mask; 1449 u64 sparse_banks[64]; 1450 int sparse_banks_len; 1451 u32 vector; 1452 bool all_cpus; 1453 1454 if (!ex) { 1455 if (!fast) { 1456 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1457 sizeof(send_ipi)))) 1458 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1459 sparse_banks[0] = send_ipi.cpu_mask; 1460 vector = send_ipi.vector; 1461 } else { 1462 /* 'reserved' part of hv_send_ipi should be 0 */ 1463 if (unlikely(ingpa >> 32 != 0)) 1464 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1465 sparse_banks[0] = outgpa; 1466 vector = (u32)ingpa; 1467 } 1468 all_cpus = false; 1469 valid_bank_mask = BIT_ULL(0); 1470 1471 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1472 } else { 1473 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1474 sizeof(send_ipi_ex)))) 1475 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1476 1477 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1478 send_ipi_ex.vp_set.format, 1479 send_ipi_ex.vp_set.valid_bank_mask); 1480 1481 vector = send_ipi_ex.vector; 1482 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1483 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1484 sizeof(sparse_banks[0]); 1485 1486 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1487 1488 if (!sparse_banks_len) 1489 goto ret_success; 1490 1491 if (!all_cpus && 1492 kvm_read_guest(kvm, 1493 ingpa + offsetof(struct hv_send_ipi_ex, 1494 vp_set.bank_contents), 1495 sparse_banks, 1496 sparse_banks_len)) 1497 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1498 } 1499 1500 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1501 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1502 1503 vcpu_mask = all_cpus ? NULL : 1504 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1505 vp_bitmap, vcpu_bitmap); 1506 1507 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1508 1509 ret_success: 1510 return HV_STATUS_SUCCESS; 1511 } 1512 1513 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1514 { 1515 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1516 } 1517 1518 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1519 { 1520 bool longmode; 1521 1522 longmode = is_64_bit_mode(vcpu); 1523 if (longmode) 1524 kvm_register_write(vcpu, VCPU_REGS_RAX, result); 1525 else { 1526 kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); 1527 kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); 1528 } 1529 } 1530 1531 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1532 { 1533 kvm_hv_hypercall_set_result(vcpu, result); 1534 ++vcpu->stat.hypercalls; 1535 return kvm_skip_emulated_instruction(vcpu); 1536 } 1537 1538 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1539 { 1540 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1541 } 1542 1543 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1544 { 1545 struct eventfd_ctx *eventfd; 1546 1547 if (unlikely(!fast)) { 1548 int ret; 1549 gpa_t gpa = param; 1550 1551 if ((gpa & (__alignof__(param) - 1)) || 1552 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1553 return HV_STATUS_INVALID_ALIGNMENT; 1554 1555 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1556 if (ret < 0) 1557 return HV_STATUS_INVALID_ALIGNMENT; 1558 } 1559 1560 /* 1561 * Per spec, bits 32-47 contain the extra "flag number". However, we 1562 * have no use for it, and in all known usecases it is zero, so just 1563 * report lookup failure if it isn't. 1564 */ 1565 if (param & 0xffff00000000ULL) 1566 return HV_STATUS_INVALID_PORT_ID; 1567 /* remaining bits are reserved-zero */ 1568 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1569 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1570 1571 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1572 rcu_read_lock(); 1573 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1574 rcu_read_unlock(); 1575 if (!eventfd) 1576 return HV_STATUS_INVALID_PORT_ID; 1577 1578 eventfd_signal(eventfd, 1); 1579 return HV_STATUS_SUCCESS; 1580 } 1581 1582 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1583 { 1584 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1585 uint16_t code, rep_idx, rep_cnt; 1586 bool fast, longmode, rep; 1587 1588 /* 1589 * hypercall generates UD from non zero cpl and real mode 1590 * per HYPER-V spec 1591 */ 1592 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1593 kvm_queue_exception(vcpu, UD_VECTOR); 1594 return 1; 1595 } 1596 1597 longmode = is_64_bit_mode(vcpu); 1598 1599 if (!longmode) { 1600 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 1601 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 1602 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 1603 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 1604 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 1605 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 1606 } 1607 #ifdef CONFIG_X86_64 1608 else { 1609 param = kvm_register_read(vcpu, VCPU_REGS_RCX); 1610 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 1611 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 1612 } 1613 #endif 1614 1615 code = param & 0xffff; 1616 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1617 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1618 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1619 rep = !!(rep_cnt || rep_idx); 1620 1621 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1622 1623 switch (code) { 1624 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1625 if (unlikely(rep)) { 1626 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1627 break; 1628 } 1629 kvm_vcpu_on_spin(vcpu, true); 1630 break; 1631 case HVCALL_SIGNAL_EVENT: 1632 if (unlikely(rep)) { 1633 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1634 break; 1635 } 1636 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1637 if (ret != HV_STATUS_INVALID_PORT_ID) 1638 break; 1639 /* fall through - maybe userspace knows this conn_id. */ 1640 case HVCALL_POST_MESSAGE: 1641 /* don't bother userspace if it has no way to handle it */ 1642 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1643 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1644 break; 1645 } 1646 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1647 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1648 vcpu->run->hyperv.u.hcall.input = param; 1649 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1650 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1651 vcpu->arch.complete_userspace_io = 1652 kvm_hv_hypercall_complete_userspace; 1653 return 0; 1654 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1655 if (unlikely(fast || !rep_cnt || rep_idx)) { 1656 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1657 break; 1658 } 1659 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1660 break; 1661 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1662 if (unlikely(fast || rep)) { 1663 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1664 break; 1665 } 1666 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1667 break; 1668 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1669 if (unlikely(fast || !rep_cnt || rep_idx)) { 1670 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1671 break; 1672 } 1673 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1674 break; 1675 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1676 if (unlikely(fast || rep)) { 1677 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1678 break; 1679 } 1680 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1681 break; 1682 case HVCALL_SEND_IPI: 1683 if (unlikely(rep)) { 1684 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1685 break; 1686 } 1687 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1688 break; 1689 case HVCALL_SEND_IPI_EX: 1690 if (unlikely(fast || rep)) { 1691 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1692 break; 1693 } 1694 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1695 break; 1696 default: 1697 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1698 break; 1699 } 1700 1701 return kvm_hv_hypercall_complete(vcpu, ret); 1702 } 1703 1704 void kvm_hv_init_vm(struct kvm *kvm) 1705 { 1706 mutex_init(&kvm->arch.hyperv.hv_lock); 1707 idr_init(&kvm->arch.hyperv.conn_to_evt); 1708 } 1709 1710 void kvm_hv_destroy_vm(struct kvm *kvm) 1711 { 1712 struct eventfd_ctx *eventfd; 1713 int i; 1714 1715 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1716 eventfd_ctx_put(eventfd); 1717 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1718 } 1719 1720 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1721 { 1722 struct kvm_hv *hv = &kvm->arch.hyperv; 1723 struct eventfd_ctx *eventfd; 1724 int ret; 1725 1726 eventfd = eventfd_ctx_fdget(fd); 1727 if (IS_ERR(eventfd)) 1728 return PTR_ERR(eventfd); 1729 1730 mutex_lock(&hv->hv_lock); 1731 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1732 GFP_KERNEL); 1733 mutex_unlock(&hv->hv_lock); 1734 1735 if (ret >= 0) 1736 return 0; 1737 1738 if (ret == -ENOSPC) 1739 ret = -EEXIST; 1740 eventfd_ctx_put(eventfd); 1741 return ret; 1742 } 1743 1744 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1745 { 1746 struct kvm_hv *hv = &kvm->arch.hyperv; 1747 struct eventfd_ctx *eventfd; 1748 1749 mutex_lock(&hv->hv_lock); 1750 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1751 mutex_unlock(&hv->hv_lock); 1752 1753 if (!eventfd) 1754 return -ENOENT; 1755 1756 synchronize_srcu(&kvm->srcu); 1757 eventfd_ctx_put(eventfd); 1758 return 0; 1759 } 1760 1761 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1762 { 1763 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1764 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1765 return -EINVAL; 1766 1767 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1768 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1769 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1770 } 1771 1772 int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1773 struct kvm_cpuid_entry2 __user *entries) 1774 { 1775 uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu); 1776 struct kvm_cpuid_entry2 cpuid_entries[] = { 1777 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1778 { .function = HYPERV_CPUID_INTERFACE }, 1779 { .function = HYPERV_CPUID_VERSION }, 1780 { .function = HYPERV_CPUID_FEATURES }, 1781 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1782 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1783 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1784 }; 1785 int i, nent = ARRAY_SIZE(cpuid_entries); 1786 1787 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1788 if (!evmcs_ver) 1789 --nent; 1790 1791 if (cpuid->nent < nent) 1792 return -E2BIG; 1793 1794 if (cpuid->nent > nent) 1795 cpuid->nent = nent; 1796 1797 for (i = 0; i < nent; i++) { 1798 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 1799 u32 signature[3]; 1800 1801 switch (ent->function) { 1802 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 1803 memcpy(signature, "Linux KVM Hv", 12); 1804 1805 ent->eax = HYPERV_CPUID_NESTED_FEATURES; 1806 ent->ebx = signature[0]; 1807 ent->ecx = signature[1]; 1808 ent->edx = signature[2]; 1809 break; 1810 1811 case HYPERV_CPUID_INTERFACE: 1812 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 1813 ent->eax = signature[0]; 1814 break; 1815 1816 case HYPERV_CPUID_VERSION: 1817 /* 1818 * We implement some Hyper-V 2016 functions so let's use 1819 * this version. 1820 */ 1821 ent->eax = 0x00003839; 1822 ent->ebx = 0x000A0000; 1823 break; 1824 1825 case HYPERV_CPUID_FEATURES: 1826 ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; 1827 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 1828 ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE; 1829 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 1830 ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; 1831 ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; 1832 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 1833 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 1834 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 1835 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 1836 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 1837 1838 ent->ebx |= HV_X64_POST_MESSAGES; 1839 ent->ebx |= HV_X64_SIGNAL_EVENTS; 1840 1841 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 1842 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 1843 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 1844 1845 break; 1846 1847 case HYPERV_CPUID_ENLIGHTMENT_INFO: 1848 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 1849 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 1850 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 1851 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 1852 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 1853 if (evmcs_ver) 1854 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 1855 1856 /* 1857 * Default number of spinlock retry attempts, matches 1858 * HyperV 2016. 1859 */ 1860 ent->ebx = 0x00000FFF; 1861 1862 break; 1863 1864 case HYPERV_CPUID_IMPLEMENT_LIMITS: 1865 /* Maximum number of virtual processors */ 1866 ent->eax = KVM_MAX_VCPUS; 1867 /* 1868 * Maximum number of logical processors, matches 1869 * HyperV 2016. 1870 */ 1871 ent->ebx = 64; 1872 1873 break; 1874 1875 case HYPERV_CPUID_NESTED_FEATURES: 1876 ent->eax = evmcs_ver; 1877 1878 break; 1879 1880 default: 1881 break; 1882 } 1883 } 1884 1885 if (copy_to_user(entries, cpuid_entries, 1886 nent * sizeof(struct kvm_cpuid_entry2))) 1887 return -EFAULT; 1888 1889 return 0; 1890 } 1891