1 /* 2 * KVM Microsoft Hyper-V emulation 3 * 4 * derived from arch/x86/kvm/x86.c 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc. 8 * Copyright IBM Corporation, 2008 9 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 11 * 12 * Authors: 13 * Avi Kivity <avi@qumranet.com> 14 * Yaniv Kamay <yaniv@qumranet.com> 15 * Amit Shah <amit.shah@qumranet.com> 16 * Ben-Ami Yassour <benami@il.ibm.com> 17 * Andrey Smetanin <asmetanin@virtuozzo.com> 18 * 19 * This work is licensed under the terms of the GNU GPL, version 2. See 20 * the COPYING file in the top-level directory. 21 * 22 */ 23 24 #include "x86.h" 25 #include "lapic.h" 26 #include "ioapic.h" 27 #include "hyperv.h" 28 29 #include <linux/kvm_host.h> 30 #include <linux/highmem.h> 31 #include <linux/sched/cputime.h> 32 #include <linux/eventfd.h> 33 34 #include <asm/apicdef.h> 35 #include <trace/events/kvm.h> 36 37 #include "trace.h" 38 39 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 40 { 41 return atomic64_read(&synic->sint[sint]); 42 } 43 44 static inline int synic_get_sint_vector(u64 sint_value) 45 { 46 if (sint_value & HV_SYNIC_SINT_MASKED) 47 return -1; 48 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 49 } 50 51 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 52 int vector) 53 { 54 int i; 55 56 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 57 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 58 return true; 59 } 60 return false; 61 } 62 63 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 64 int vector) 65 { 66 int i; 67 u64 sint_value; 68 69 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 70 sint_value = synic_read_sint(synic, i); 71 if (synic_get_sint_vector(sint_value) == vector && 72 sint_value & HV_SYNIC_SINT_AUTO_EOI) 73 return true; 74 } 75 return false; 76 } 77 78 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 79 int vector) 80 { 81 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 82 return; 83 84 if (synic_has_vector_connected(synic, vector)) 85 __set_bit(vector, synic->vec_bitmap); 86 else 87 __clear_bit(vector, synic->vec_bitmap); 88 89 if (synic_has_vector_auto_eoi(synic, vector)) 90 __set_bit(vector, synic->auto_eoi_bitmap); 91 else 92 __clear_bit(vector, synic->auto_eoi_bitmap); 93 } 94 95 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 96 u64 data, bool host) 97 { 98 int vector, old_vector; 99 bool masked; 100 101 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 102 masked = data & HV_SYNIC_SINT_MASKED; 103 104 /* 105 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 106 * default '0x10000' value on boot and this should not #GP. We need to 107 * allow zero-initing the register from host as well. 108 */ 109 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 110 return 1; 111 /* 112 * Guest may configure multiple SINTs to use the same vector, so 113 * we maintain a bitmap of vectors handled by synic, and a 114 * bitmap of vectors with auto-eoi behavior. The bitmaps are 115 * updated here, and atomically queried on fast paths. 116 */ 117 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 118 119 atomic64_set(&synic->sint[sint], data); 120 121 synic_update_vector(synic, old_vector); 122 123 synic_update_vector(synic, vector); 124 125 /* Load SynIC vectors into EOI exit bitmap */ 126 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 127 return 0; 128 } 129 130 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 131 { 132 struct kvm_vcpu *vcpu = NULL; 133 int i; 134 135 if (vpidx < KVM_MAX_VCPUS) 136 vcpu = kvm_get_vcpu(kvm, vpidx); 137 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 138 return vcpu; 139 kvm_for_each_vcpu(i, vcpu, kvm) 140 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 141 return vcpu; 142 return NULL; 143 } 144 145 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 146 { 147 struct kvm_vcpu *vcpu; 148 struct kvm_vcpu_hv_synic *synic; 149 150 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 151 if (!vcpu) 152 return NULL; 153 synic = vcpu_to_synic(vcpu); 154 return (synic->active) ? synic : NULL; 155 } 156 157 static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, 158 u32 sint) 159 { 160 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 161 struct page *page; 162 gpa_t gpa; 163 struct hv_message *msg; 164 struct hv_message_page *msg_page; 165 166 gpa = synic->msg_page & PAGE_MASK; 167 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 168 if (is_error_page(page)) { 169 vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", 170 gpa); 171 return; 172 } 173 msg_page = kmap_atomic(page); 174 175 msg = &msg_page->sint_message[sint]; 176 msg->header.message_flags.msg_pending = 0; 177 178 kunmap_atomic(msg_page); 179 kvm_release_page_dirty(page); 180 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 181 } 182 183 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 184 { 185 struct kvm *kvm = vcpu->kvm; 186 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 187 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 188 struct kvm_vcpu_hv_stimer *stimer; 189 int gsi, idx, stimers_pending; 190 191 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 192 193 if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) 194 synic_clear_sint_msg_pending(synic, sint); 195 196 /* Try to deliver pending Hyper-V SynIC timers messages */ 197 stimers_pending = 0; 198 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 199 stimer = &hv_vcpu->stimer[idx]; 200 if (stimer->msg_pending && 201 (stimer->config & HV_STIMER_ENABLE) && 202 HV_STIMER_SINT(stimer->config) == sint) { 203 set_bit(stimer->index, 204 hv_vcpu->stimer_pending_bitmap); 205 stimers_pending++; 206 } 207 } 208 if (stimers_pending) 209 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 210 211 idx = srcu_read_lock(&kvm->irq_srcu); 212 gsi = atomic_read(&synic->sint_to_gsi[sint]); 213 if (gsi != -1) 214 kvm_notify_acked_gsi(kvm, gsi); 215 srcu_read_unlock(&kvm->irq_srcu, idx); 216 } 217 218 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 219 { 220 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 221 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 222 223 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 224 hv_vcpu->exit.u.synic.msr = msr; 225 hv_vcpu->exit.u.synic.control = synic->control; 226 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 227 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 228 229 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 230 } 231 232 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 233 u32 msr, u64 data, bool host) 234 { 235 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 236 int ret; 237 238 if (!synic->active) 239 return 1; 240 241 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 242 243 ret = 0; 244 switch (msr) { 245 case HV_X64_MSR_SCONTROL: 246 synic->control = data; 247 if (!host) 248 synic_exit(synic, msr); 249 break; 250 case HV_X64_MSR_SVERSION: 251 if (!host) { 252 ret = 1; 253 break; 254 } 255 synic->version = data; 256 break; 257 case HV_X64_MSR_SIEFP: 258 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 259 !synic->dont_zero_synic_pages) 260 if (kvm_clear_guest(vcpu->kvm, 261 data & PAGE_MASK, PAGE_SIZE)) { 262 ret = 1; 263 break; 264 } 265 synic->evt_page = data; 266 if (!host) 267 synic_exit(synic, msr); 268 break; 269 case HV_X64_MSR_SIMP: 270 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 271 !synic->dont_zero_synic_pages) 272 if (kvm_clear_guest(vcpu->kvm, 273 data & PAGE_MASK, PAGE_SIZE)) { 274 ret = 1; 275 break; 276 } 277 synic->msg_page = data; 278 if (!host) 279 synic_exit(synic, msr); 280 break; 281 case HV_X64_MSR_EOM: { 282 int i; 283 284 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 285 kvm_hv_notify_acked_sint(vcpu, i); 286 break; 287 } 288 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 289 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 290 break; 291 default: 292 ret = 1; 293 break; 294 } 295 return ret; 296 } 297 298 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) 299 { 300 int ret; 301 302 if (!synic->active) 303 return 1; 304 305 ret = 0; 306 switch (msr) { 307 case HV_X64_MSR_SCONTROL: 308 *pdata = synic->control; 309 break; 310 case HV_X64_MSR_SVERSION: 311 *pdata = synic->version; 312 break; 313 case HV_X64_MSR_SIEFP: 314 *pdata = synic->evt_page; 315 break; 316 case HV_X64_MSR_SIMP: 317 *pdata = synic->msg_page; 318 break; 319 case HV_X64_MSR_EOM: 320 *pdata = 0; 321 break; 322 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 323 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 324 break; 325 default: 326 ret = 1; 327 break; 328 } 329 return ret; 330 } 331 332 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 333 { 334 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 335 struct kvm_lapic_irq irq; 336 int ret, vector; 337 338 if (sint >= ARRAY_SIZE(synic->sint)) 339 return -EINVAL; 340 341 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 342 if (vector < 0) 343 return -ENOENT; 344 345 memset(&irq, 0, sizeof(irq)); 346 irq.shorthand = APIC_DEST_SELF; 347 irq.dest_mode = APIC_DEST_PHYSICAL; 348 irq.delivery_mode = APIC_DM_FIXED; 349 irq.vector = vector; 350 irq.level = 1; 351 352 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 353 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 354 return ret; 355 } 356 357 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 358 { 359 struct kvm_vcpu_hv_synic *synic; 360 361 synic = synic_get(kvm, vpidx); 362 if (!synic) 363 return -EINVAL; 364 365 return synic_set_irq(synic, sint); 366 } 367 368 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 369 { 370 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 371 int i; 372 373 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 374 375 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 376 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 377 kvm_hv_notify_acked_sint(vcpu, i); 378 } 379 380 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 381 { 382 struct kvm_vcpu_hv_synic *synic; 383 384 synic = synic_get(kvm, vpidx); 385 if (!synic) 386 return -EINVAL; 387 388 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 389 return -EINVAL; 390 391 atomic_set(&synic->sint_to_gsi[sint], gsi); 392 return 0; 393 } 394 395 void kvm_hv_irq_routing_update(struct kvm *kvm) 396 { 397 struct kvm_irq_routing_table *irq_rt; 398 struct kvm_kernel_irq_routing_entry *e; 399 u32 gsi; 400 401 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 402 lockdep_is_held(&kvm->irq_lock)); 403 404 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 405 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 406 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 407 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 408 e->hv_sint.sint, gsi); 409 } 410 } 411 } 412 413 static void synic_init(struct kvm_vcpu_hv_synic *synic) 414 { 415 int i; 416 417 memset(synic, 0, sizeof(*synic)); 418 synic->version = HV_SYNIC_VERSION_1; 419 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 420 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 421 atomic_set(&synic->sint_to_gsi[i], -1); 422 } 423 } 424 425 static u64 get_time_ref_counter(struct kvm *kvm) 426 { 427 struct kvm_hv *hv = &kvm->arch.hyperv; 428 struct kvm_vcpu *vcpu; 429 u64 tsc; 430 431 /* 432 * The guest has not set up the TSC page or the clock isn't 433 * stable, fall back to get_kvmclock_ns. 434 */ 435 if (!hv->tsc_ref.tsc_sequence) 436 return div_u64(get_kvmclock_ns(kvm), 100); 437 438 vcpu = kvm_get_vcpu(kvm, 0); 439 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 440 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 441 + hv->tsc_ref.tsc_offset; 442 } 443 444 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 445 bool vcpu_kick) 446 { 447 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 448 449 set_bit(stimer->index, 450 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 451 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 452 if (vcpu_kick) 453 kvm_vcpu_kick(vcpu); 454 } 455 456 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 457 { 458 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 459 460 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 461 stimer->index); 462 463 hrtimer_cancel(&stimer->timer); 464 clear_bit(stimer->index, 465 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 466 stimer->msg_pending = false; 467 stimer->exp_time = 0; 468 } 469 470 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 471 { 472 struct kvm_vcpu_hv_stimer *stimer; 473 474 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 475 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 476 stimer->index); 477 stimer_mark_pending(stimer, true); 478 479 return HRTIMER_NORESTART; 480 } 481 482 /* 483 * stimer_start() assumptions: 484 * a) stimer->count is not equal to 0 485 * b) stimer->config has HV_STIMER_ENABLE flag 486 */ 487 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 488 { 489 u64 time_now; 490 ktime_t ktime_now; 491 492 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 493 ktime_now = ktime_get(); 494 495 if (stimer->config & HV_STIMER_PERIODIC) { 496 if (stimer->exp_time) { 497 if (time_now >= stimer->exp_time) { 498 u64 remainder; 499 500 div64_u64_rem(time_now - stimer->exp_time, 501 stimer->count, &remainder); 502 stimer->exp_time = 503 time_now + (stimer->count - remainder); 504 } 505 } else 506 stimer->exp_time = time_now + stimer->count; 507 508 trace_kvm_hv_stimer_start_periodic( 509 stimer_to_vcpu(stimer)->vcpu_id, 510 stimer->index, 511 time_now, stimer->exp_time); 512 513 hrtimer_start(&stimer->timer, 514 ktime_add_ns(ktime_now, 515 100 * (stimer->exp_time - time_now)), 516 HRTIMER_MODE_ABS); 517 return 0; 518 } 519 stimer->exp_time = stimer->count; 520 if (time_now >= stimer->count) { 521 /* 522 * Expire timer according to Hypervisor Top-Level Functional 523 * specification v4(15.3.1): 524 * "If a one shot is enabled and the specified count is in 525 * the past, it will expire immediately." 526 */ 527 stimer_mark_pending(stimer, false); 528 return 0; 529 } 530 531 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 532 stimer->index, 533 time_now, stimer->count); 534 535 hrtimer_start(&stimer->timer, 536 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 537 HRTIMER_MODE_ABS); 538 return 0; 539 } 540 541 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 542 bool host) 543 { 544 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 545 stimer->index, config, host); 546 547 stimer_cleanup(stimer); 548 if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) 549 config &= ~HV_STIMER_ENABLE; 550 stimer->config = config; 551 stimer_mark_pending(stimer, false); 552 return 0; 553 } 554 555 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 556 bool host) 557 { 558 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 559 stimer->index, count, host); 560 561 stimer_cleanup(stimer); 562 stimer->count = count; 563 if (stimer->count == 0) 564 stimer->config &= ~HV_STIMER_ENABLE; 565 else if (stimer->config & HV_STIMER_AUTOENABLE) 566 stimer->config |= HV_STIMER_ENABLE; 567 stimer_mark_pending(stimer, false); 568 return 0; 569 } 570 571 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 572 { 573 *pconfig = stimer->config; 574 return 0; 575 } 576 577 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 578 { 579 *pcount = stimer->count; 580 return 0; 581 } 582 583 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 584 struct hv_message *src_msg) 585 { 586 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 587 struct page *page; 588 gpa_t gpa; 589 struct hv_message *dst_msg; 590 int r; 591 struct hv_message_page *msg_page; 592 593 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 594 return -ENOENT; 595 596 gpa = synic->msg_page & PAGE_MASK; 597 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 598 if (is_error_page(page)) 599 return -EFAULT; 600 601 msg_page = kmap_atomic(page); 602 dst_msg = &msg_page->sint_message[sint]; 603 if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, 604 src_msg->header.message_type) != HVMSG_NONE) { 605 dst_msg->header.message_flags.msg_pending = 1; 606 r = -EAGAIN; 607 } else { 608 memcpy(&dst_msg->u.payload, &src_msg->u.payload, 609 src_msg->header.payload_size); 610 dst_msg->header.message_type = src_msg->header.message_type; 611 dst_msg->header.payload_size = src_msg->header.payload_size; 612 r = synic_set_irq(synic, sint); 613 if (r >= 1) 614 r = 0; 615 else if (r == 0) 616 r = -EFAULT; 617 } 618 kunmap_atomic(msg_page); 619 kvm_release_page_dirty(page); 620 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 621 return r; 622 } 623 624 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 625 { 626 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 627 struct hv_message *msg = &stimer->msg; 628 struct hv_timer_message_payload *payload = 629 (struct hv_timer_message_payload *)&msg->u.payload; 630 631 payload->expiration_time = stimer->exp_time; 632 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 633 return synic_deliver_msg(vcpu_to_synic(vcpu), 634 HV_STIMER_SINT(stimer->config), msg); 635 } 636 637 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 638 { 639 int r; 640 641 stimer->msg_pending = true; 642 r = stimer_send_msg(stimer); 643 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 644 stimer->index, r); 645 if (!r) { 646 stimer->msg_pending = false; 647 if (!(stimer->config & HV_STIMER_PERIODIC)) 648 stimer->config &= ~HV_STIMER_ENABLE; 649 } 650 } 651 652 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 653 { 654 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 655 struct kvm_vcpu_hv_stimer *stimer; 656 u64 time_now, exp_time; 657 int i; 658 659 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 660 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 661 stimer = &hv_vcpu->stimer[i]; 662 if (stimer->config & HV_STIMER_ENABLE) { 663 exp_time = stimer->exp_time; 664 665 if (exp_time) { 666 time_now = 667 get_time_ref_counter(vcpu->kvm); 668 if (time_now >= exp_time) 669 stimer_expiration(stimer); 670 } 671 672 if ((stimer->config & HV_STIMER_ENABLE) && 673 stimer->count) { 674 if (!stimer->msg_pending) 675 stimer_start(stimer); 676 } else 677 stimer_cleanup(stimer); 678 } 679 } 680 } 681 682 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 683 { 684 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 685 int i; 686 687 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 688 stimer_cleanup(&hv_vcpu->stimer[i]); 689 } 690 691 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 692 { 693 struct hv_message *msg = &stimer->msg; 694 struct hv_timer_message_payload *payload = 695 (struct hv_timer_message_payload *)&msg->u.payload; 696 697 memset(&msg->header, 0, sizeof(msg->header)); 698 msg->header.message_type = HVMSG_TIMER_EXPIRED; 699 msg->header.payload_size = sizeof(*payload); 700 701 payload->timer_index = stimer->index; 702 payload->expiration_time = 0; 703 payload->delivery_time = 0; 704 } 705 706 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 707 { 708 memset(stimer, 0, sizeof(*stimer)); 709 stimer->index = timer_index; 710 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 711 stimer->timer.function = stimer_timer_callback; 712 stimer_prepare_msg(stimer); 713 } 714 715 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 716 { 717 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 718 int i; 719 720 synic_init(&hv_vcpu->synic); 721 722 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 723 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 724 stimer_init(&hv_vcpu->stimer[i], i); 725 } 726 727 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 728 { 729 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 730 731 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 732 } 733 734 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 735 { 736 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 737 738 /* 739 * Hyper-V SynIC auto EOI SINT's are 740 * not compatible with APICV, so deactivate APICV 741 */ 742 kvm_vcpu_deactivate_apicv(vcpu); 743 synic->active = true; 744 synic->dont_zero_synic_pages = dont_zero_synic_pages; 745 return 0; 746 } 747 748 static bool kvm_hv_msr_partition_wide(u32 msr) 749 { 750 bool r = false; 751 752 switch (msr) { 753 case HV_X64_MSR_GUEST_OS_ID: 754 case HV_X64_MSR_HYPERCALL: 755 case HV_X64_MSR_REFERENCE_TSC: 756 case HV_X64_MSR_TIME_REF_COUNT: 757 case HV_X64_MSR_CRASH_CTL: 758 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 759 case HV_X64_MSR_RESET: 760 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 761 case HV_X64_MSR_TSC_EMULATION_CONTROL: 762 case HV_X64_MSR_TSC_EMULATION_STATUS: 763 r = true; 764 break; 765 } 766 767 return r; 768 } 769 770 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 771 u32 index, u64 *pdata) 772 { 773 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 774 775 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 776 return -EINVAL; 777 778 *pdata = hv->hv_crash_param[index]; 779 return 0; 780 } 781 782 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 783 { 784 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 785 786 *pdata = hv->hv_crash_ctl; 787 return 0; 788 } 789 790 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 791 { 792 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 793 794 if (host) 795 hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY; 796 797 if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) { 798 799 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 800 hv->hv_crash_param[0], 801 hv->hv_crash_param[1], 802 hv->hv_crash_param[2], 803 hv->hv_crash_param[3], 804 hv->hv_crash_param[4]); 805 806 /* Send notification about crash to user space */ 807 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 808 } 809 810 return 0; 811 } 812 813 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 814 u32 index, u64 data) 815 { 816 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 817 818 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 819 return -EINVAL; 820 821 hv->hv_crash_param[index] = data; 822 return 0; 823 } 824 825 /* 826 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 827 * between them is possible: 828 * 829 * kvmclock formula: 830 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 831 * + system_time 832 * 833 * Hyper-V formula: 834 * nsec/100 = ticks * scale / 2^64 + offset 835 * 836 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 837 * By dividing the kvmclock formula by 100 and equating what's left we get: 838 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 839 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 840 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 841 * 842 * Now expand the kvmclock formula and divide by 100: 843 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 844 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 845 * + system_time 846 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 847 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 848 * + system_time / 100 849 * 850 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 851 * nsec/100 = ticks * scale / 2^64 852 * - tsc_timestamp * scale / 2^64 853 * + system_time / 100 854 * 855 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 856 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 857 * 858 * These two equivalencies are implemented in this function. 859 */ 860 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 861 HV_REFERENCE_TSC_PAGE *tsc_ref) 862 { 863 u64 max_mul; 864 865 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 866 return false; 867 868 /* 869 * check if scale would overflow, if so we use the time ref counter 870 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 871 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 872 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 873 */ 874 max_mul = 100ull << (32 - hv_clock->tsc_shift); 875 if (hv_clock->tsc_to_system_mul >= max_mul) 876 return false; 877 878 /* 879 * Otherwise compute the scale and offset according to the formulas 880 * derived above. 881 */ 882 tsc_ref->tsc_scale = 883 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 884 hv_clock->tsc_to_system_mul, 885 100); 886 887 tsc_ref->tsc_offset = hv_clock->system_time; 888 do_div(tsc_ref->tsc_offset, 100); 889 tsc_ref->tsc_offset -= 890 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 891 return true; 892 } 893 894 void kvm_hv_setup_tsc_page(struct kvm *kvm, 895 struct pvclock_vcpu_time_info *hv_clock) 896 { 897 struct kvm_hv *hv = &kvm->arch.hyperv; 898 u32 tsc_seq; 899 u64 gfn; 900 901 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 902 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 903 904 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 905 return; 906 907 mutex_lock(&kvm->arch.hyperv.hv_lock); 908 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 909 goto out_unlock; 910 911 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 912 /* 913 * Because the TSC parameters only vary when there is a 914 * change in the master clock, do not bother with caching. 915 */ 916 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 917 &tsc_seq, sizeof(tsc_seq)))) 918 goto out_unlock; 919 920 /* 921 * While we're computing and writing the parameters, force the 922 * guest to use the time reference count MSR. 923 */ 924 hv->tsc_ref.tsc_sequence = 0; 925 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 926 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 927 goto out_unlock; 928 929 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 930 goto out_unlock; 931 932 /* Ensure sequence is zero before writing the rest of the struct. */ 933 smp_wmb(); 934 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 935 goto out_unlock; 936 937 /* 938 * Now switch to the TSC page mechanism by writing the sequence. 939 */ 940 tsc_seq++; 941 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 942 tsc_seq = 1; 943 944 /* Write the struct entirely before the non-zero sequence. */ 945 smp_wmb(); 946 947 hv->tsc_ref.tsc_sequence = tsc_seq; 948 kvm_write_guest(kvm, gfn_to_gpa(gfn), 949 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 950 out_unlock: 951 mutex_unlock(&kvm->arch.hyperv.hv_lock); 952 } 953 954 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 955 bool host) 956 { 957 struct kvm *kvm = vcpu->kvm; 958 struct kvm_hv *hv = &kvm->arch.hyperv; 959 960 switch (msr) { 961 case HV_X64_MSR_GUEST_OS_ID: 962 hv->hv_guest_os_id = data; 963 /* setting guest os id to zero disables hypercall page */ 964 if (!hv->hv_guest_os_id) 965 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 966 break; 967 case HV_X64_MSR_HYPERCALL: { 968 u64 gfn; 969 unsigned long addr; 970 u8 instructions[4]; 971 972 /* if guest os id is not set hypercall should remain disabled */ 973 if (!hv->hv_guest_os_id) 974 break; 975 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 976 hv->hv_hypercall = data; 977 break; 978 } 979 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 980 addr = gfn_to_hva(kvm, gfn); 981 if (kvm_is_error_hva(addr)) 982 return 1; 983 kvm_x86_ops->patch_hypercall(vcpu, instructions); 984 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 985 if (__copy_to_user((void __user *)addr, instructions, 4)) 986 return 1; 987 hv->hv_hypercall = data; 988 mark_page_dirty(kvm, gfn); 989 break; 990 } 991 case HV_X64_MSR_REFERENCE_TSC: 992 hv->hv_tsc_page = data; 993 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 994 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 995 break; 996 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 997 return kvm_hv_msr_set_crash_data(vcpu, 998 msr - HV_X64_MSR_CRASH_P0, 999 data); 1000 case HV_X64_MSR_CRASH_CTL: 1001 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1002 case HV_X64_MSR_RESET: 1003 if (data == 1) { 1004 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1005 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1006 } 1007 break; 1008 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1009 hv->hv_reenlightenment_control = data; 1010 break; 1011 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1012 hv->hv_tsc_emulation_control = data; 1013 break; 1014 case HV_X64_MSR_TSC_EMULATION_STATUS: 1015 hv->hv_tsc_emulation_status = data; 1016 break; 1017 default: 1018 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1019 msr, data); 1020 return 1; 1021 } 1022 return 0; 1023 } 1024 1025 /* Calculate cpu time spent by current task in 100ns units */ 1026 static u64 current_task_runtime_100ns(void) 1027 { 1028 u64 utime, stime; 1029 1030 task_cputime_adjusted(current, &utime, &stime); 1031 1032 return div_u64(utime + stime, 100); 1033 } 1034 1035 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1036 { 1037 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1038 1039 switch (msr) { 1040 case HV_X64_MSR_VP_INDEX: 1041 if (!host) 1042 return 1; 1043 hv->vp_index = (u32)data; 1044 break; 1045 case HV_X64_MSR_VP_ASSIST_PAGE: { 1046 u64 gfn; 1047 unsigned long addr; 1048 1049 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1050 hv->hv_vapic = data; 1051 if (kvm_lapic_enable_pv_eoi(vcpu, 0)) 1052 return 1; 1053 break; 1054 } 1055 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1056 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1057 if (kvm_is_error_hva(addr)) 1058 return 1; 1059 if (__clear_user((void __user *)addr, PAGE_SIZE)) 1060 return 1; 1061 hv->hv_vapic = data; 1062 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1063 if (kvm_lapic_enable_pv_eoi(vcpu, 1064 gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) 1065 return 1; 1066 break; 1067 } 1068 case HV_X64_MSR_EOI: 1069 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1070 case HV_X64_MSR_ICR: 1071 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1072 case HV_X64_MSR_TPR: 1073 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1074 case HV_X64_MSR_VP_RUNTIME: 1075 if (!host) 1076 return 1; 1077 hv->runtime_offset = data - current_task_runtime_100ns(); 1078 break; 1079 case HV_X64_MSR_SCONTROL: 1080 case HV_X64_MSR_SVERSION: 1081 case HV_X64_MSR_SIEFP: 1082 case HV_X64_MSR_SIMP: 1083 case HV_X64_MSR_EOM: 1084 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1085 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1086 case HV_X64_MSR_STIMER0_CONFIG: 1087 case HV_X64_MSR_STIMER1_CONFIG: 1088 case HV_X64_MSR_STIMER2_CONFIG: 1089 case HV_X64_MSR_STIMER3_CONFIG: { 1090 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1091 1092 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1093 data, host); 1094 } 1095 case HV_X64_MSR_STIMER0_COUNT: 1096 case HV_X64_MSR_STIMER1_COUNT: 1097 case HV_X64_MSR_STIMER2_COUNT: 1098 case HV_X64_MSR_STIMER3_COUNT: { 1099 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1100 1101 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1102 data, host); 1103 } 1104 default: 1105 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1106 msr, data); 1107 return 1; 1108 } 1109 1110 return 0; 1111 } 1112 1113 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1114 { 1115 u64 data = 0; 1116 struct kvm *kvm = vcpu->kvm; 1117 struct kvm_hv *hv = &kvm->arch.hyperv; 1118 1119 switch (msr) { 1120 case HV_X64_MSR_GUEST_OS_ID: 1121 data = hv->hv_guest_os_id; 1122 break; 1123 case HV_X64_MSR_HYPERCALL: 1124 data = hv->hv_hypercall; 1125 break; 1126 case HV_X64_MSR_TIME_REF_COUNT: 1127 data = get_time_ref_counter(kvm); 1128 break; 1129 case HV_X64_MSR_REFERENCE_TSC: 1130 data = hv->hv_tsc_page; 1131 break; 1132 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1133 return kvm_hv_msr_get_crash_data(vcpu, 1134 msr - HV_X64_MSR_CRASH_P0, 1135 pdata); 1136 case HV_X64_MSR_CRASH_CTL: 1137 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1138 case HV_X64_MSR_RESET: 1139 data = 0; 1140 break; 1141 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1142 data = hv->hv_reenlightenment_control; 1143 break; 1144 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1145 data = hv->hv_tsc_emulation_control; 1146 break; 1147 case HV_X64_MSR_TSC_EMULATION_STATUS: 1148 data = hv->hv_tsc_emulation_status; 1149 break; 1150 default: 1151 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1152 return 1; 1153 } 1154 1155 *pdata = data; 1156 return 0; 1157 } 1158 1159 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1160 { 1161 u64 data = 0; 1162 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1163 1164 switch (msr) { 1165 case HV_X64_MSR_VP_INDEX: 1166 data = hv->vp_index; 1167 break; 1168 case HV_X64_MSR_EOI: 1169 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1170 case HV_X64_MSR_ICR: 1171 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1172 case HV_X64_MSR_TPR: 1173 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1174 case HV_X64_MSR_VP_ASSIST_PAGE: 1175 data = hv->hv_vapic; 1176 break; 1177 case HV_X64_MSR_VP_RUNTIME: 1178 data = current_task_runtime_100ns() + hv->runtime_offset; 1179 break; 1180 case HV_X64_MSR_SCONTROL: 1181 case HV_X64_MSR_SVERSION: 1182 case HV_X64_MSR_SIEFP: 1183 case HV_X64_MSR_SIMP: 1184 case HV_X64_MSR_EOM: 1185 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1186 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); 1187 case HV_X64_MSR_STIMER0_CONFIG: 1188 case HV_X64_MSR_STIMER1_CONFIG: 1189 case HV_X64_MSR_STIMER2_CONFIG: 1190 case HV_X64_MSR_STIMER3_CONFIG: { 1191 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1192 1193 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1194 pdata); 1195 } 1196 case HV_X64_MSR_STIMER0_COUNT: 1197 case HV_X64_MSR_STIMER1_COUNT: 1198 case HV_X64_MSR_STIMER2_COUNT: 1199 case HV_X64_MSR_STIMER3_COUNT: { 1200 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1201 1202 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1203 pdata); 1204 } 1205 case HV_X64_MSR_TSC_FREQUENCY: 1206 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1207 break; 1208 case HV_X64_MSR_APIC_FREQUENCY: 1209 data = APIC_BUS_FREQUENCY; 1210 break; 1211 default: 1212 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1213 return 1; 1214 } 1215 *pdata = data; 1216 return 0; 1217 } 1218 1219 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1220 { 1221 if (kvm_hv_msr_partition_wide(msr)) { 1222 int r; 1223 1224 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1225 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1226 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1227 return r; 1228 } else 1229 return kvm_hv_set_msr(vcpu, msr, data, host); 1230 } 1231 1232 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1233 { 1234 if (kvm_hv_msr_partition_wide(msr)) { 1235 int r; 1236 1237 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1238 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1239 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1240 return r; 1241 } else 1242 return kvm_hv_get_msr(vcpu, msr, pdata); 1243 } 1244 1245 static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) 1246 { 1247 int i = 0, j; 1248 1249 if (!(valid_bank_mask & BIT_ULL(bank_no))) 1250 return -1; 1251 1252 for (j = 0; j < bank_no; j++) 1253 if (valid_bank_mask & BIT_ULL(j)) 1254 i++; 1255 1256 return i; 1257 } 1258 1259 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1260 u16 rep_cnt, bool ex) 1261 { 1262 struct kvm *kvm = current_vcpu->kvm; 1263 struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv; 1264 struct hv_tlb_flush_ex flush_ex; 1265 struct hv_tlb_flush flush; 1266 struct kvm_vcpu *vcpu; 1267 unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0}; 1268 unsigned long valid_bank_mask = 0; 1269 u64 sparse_banks[64]; 1270 int sparse_banks_len, i; 1271 bool all_cpus; 1272 1273 if (!ex) { 1274 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1275 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1276 1277 trace_kvm_hv_flush_tlb(flush.processor_mask, 1278 flush.address_space, flush.flags); 1279 1280 sparse_banks[0] = flush.processor_mask; 1281 all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; 1282 } else { 1283 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1284 sizeof(flush_ex)))) 1285 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1286 1287 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1288 flush_ex.hv_vp_set.format, 1289 flush_ex.address_space, 1290 flush_ex.flags); 1291 1292 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1293 all_cpus = flush_ex.hv_vp_set.format != 1294 HV_GENERIC_SET_SPARSE_4K; 1295 1296 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1297 sizeof(sparse_banks[0]); 1298 1299 if (!sparse_banks_len && !all_cpus) 1300 goto ret_success; 1301 1302 if (!all_cpus && 1303 kvm_read_guest(kvm, 1304 ingpa + offsetof(struct hv_tlb_flush_ex, 1305 hv_vp_set.bank_contents), 1306 sparse_banks, 1307 sparse_banks_len)) 1308 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1309 } 1310 1311 cpumask_clear(&hv_current->tlb_lush); 1312 1313 kvm_for_each_vcpu(i, vcpu, kvm) { 1314 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1315 int bank = hv->vp_index / 64, sbank = 0; 1316 1317 if (!all_cpus) { 1318 /* Banks >64 can't be represented */ 1319 if (bank >= 64) 1320 continue; 1321 1322 /* Non-ex hypercalls can only address first 64 vCPUs */ 1323 if (!ex && bank) 1324 continue; 1325 1326 if (ex) { 1327 /* 1328 * Check is the bank of this vCPU is in sparse 1329 * set and get the sparse bank number. 1330 */ 1331 sbank = get_sparse_bank_no(valid_bank_mask, 1332 bank); 1333 1334 if (sbank < 0) 1335 continue; 1336 } 1337 1338 if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) 1339 continue; 1340 } 1341 1342 /* 1343 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we 1344 * can't analyze it here, flush TLB regardless of the specified 1345 * address space. 1346 */ 1347 __set_bit(i, vcpu_bitmap); 1348 } 1349 1350 kvm_make_vcpus_request_mask(kvm, 1351 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1352 vcpu_bitmap, &hv_current->tlb_lush); 1353 1354 ret_success: 1355 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1356 return (u64)HV_STATUS_SUCCESS | 1357 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1358 } 1359 1360 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1361 { 1362 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1363 } 1364 1365 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1366 { 1367 bool longmode; 1368 1369 longmode = is_64_bit_mode(vcpu); 1370 if (longmode) 1371 kvm_register_write(vcpu, VCPU_REGS_RAX, result); 1372 else { 1373 kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); 1374 kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); 1375 } 1376 } 1377 1378 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1379 { 1380 kvm_hv_hypercall_set_result(vcpu, result); 1381 ++vcpu->stat.hypercalls; 1382 return kvm_skip_emulated_instruction(vcpu); 1383 } 1384 1385 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1386 { 1387 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1388 } 1389 1390 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1391 { 1392 struct eventfd_ctx *eventfd; 1393 1394 if (unlikely(!fast)) { 1395 int ret; 1396 gpa_t gpa = param; 1397 1398 if ((gpa & (__alignof__(param) - 1)) || 1399 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1400 return HV_STATUS_INVALID_ALIGNMENT; 1401 1402 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1403 if (ret < 0) 1404 return HV_STATUS_INVALID_ALIGNMENT; 1405 } 1406 1407 /* 1408 * Per spec, bits 32-47 contain the extra "flag number". However, we 1409 * have no use for it, and in all known usecases it is zero, so just 1410 * report lookup failure if it isn't. 1411 */ 1412 if (param & 0xffff00000000ULL) 1413 return HV_STATUS_INVALID_PORT_ID; 1414 /* remaining bits are reserved-zero */ 1415 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1416 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1417 1418 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1419 rcu_read_lock(); 1420 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1421 rcu_read_unlock(); 1422 if (!eventfd) 1423 return HV_STATUS_INVALID_PORT_ID; 1424 1425 eventfd_signal(eventfd, 1); 1426 return HV_STATUS_SUCCESS; 1427 } 1428 1429 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1430 { 1431 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1432 uint16_t code, rep_idx, rep_cnt; 1433 bool fast, longmode, rep; 1434 1435 /* 1436 * hypercall generates UD from non zero cpl and real mode 1437 * per HYPER-V spec 1438 */ 1439 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1440 kvm_queue_exception(vcpu, UD_VECTOR); 1441 return 1; 1442 } 1443 1444 longmode = is_64_bit_mode(vcpu); 1445 1446 if (!longmode) { 1447 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 1448 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 1449 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 1450 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 1451 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 1452 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 1453 } 1454 #ifdef CONFIG_X86_64 1455 else { 1456 param = kvm_register_read(vcpu, VCPU_REGS_RCX); 1457 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 1458 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 1459 } 1460 #endif 1461 1462 code = param & 0xffff; 1463 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1464 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1465 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1466 rep = !!(rep_cnt || rep_idx); 1467 1468 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1469 1470 switch (code) { 1471 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1472 if (unlikely(rep)) { 1473 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1474 break; 1475 } 1476 kvm_vcpu_on_spin(vcpu, true); 1477 break; 1478 case HVCALL_SIGNAL_EVENT: 1479 if (unlikely(rep)) { 1480 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1481 break; 1482 } 1483 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1484 if (ret != HV_STATUS_INVALID_PORT_ID) 1485 break; 1486 /* maybe userspace knows this conn_id: fall through */ 1487 case HVCALL_POST_MESSAGE: 1488 /* don't bother userspace if it has no way to handle it */ 1489 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1490 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1491 break; 1492 } 1493 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1494 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1495 vcpu->run->hyperv.u.hcall.input = param; 1496 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1497 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1498 vcpu->arch.complete_userspace_io = 1499 kvm_hv_hypercall_complete_userspace; 1500 return 0; 1501 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1502 if (unlikely(fast || !rep_cnt || rep_idx)) { 1503 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1504 break; 1505 } 1506 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1507 break; 1508 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1509 if (unlikely(fast || rep)) { 1510 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1511 break; 1512 } 1513 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1514 break; 1515 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1516 if (unlikely(fast || !rep_cnt || rep_idx)) { 1517 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1518 break; 1519 } 1520 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1521 break; 1522 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1523 if (unlikely(fast || rep)) { 1524 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1525 break; 1526 } 1527 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1528 break; 1529 default: 1530 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1531 break; 1532 } 1533 1534 return kvm_hv_hypercall_complete(vcpu, ret); 1535 } 1536 1537 void kvm_hv_init_vm(struct kvm *kvm) 1538 { 1539 mutex_init(&kvm->arch.hyperv.hv_lock); 1540 idr_init(&kvm->arch.hyperv.conn_to_evt); 1541 } 1542 1543 void kvm_hv_destroy_vm(struct kvm *kvm) 1544 { 1545 struct eventfd_ctx *eventfd; 1546 int i; 1547 1548 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1549 eventfd_ctx_put(eventfd); 1550 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1551 } 1552 1553 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1554 { 1555 struct kvm_hv *hv = &kvm->arch.hyperv; 1556 struct eventfd_ctx *eventfd; 1557 int ret; 1558 1559 eventfd = eventfd_ctx_fdget(fd); 1560 if (IS_ERR(eventfd)) 1561 return PTR_ERR(eventfd); 1562 1563 mutex_lock(&hv->hv_lock); 1564 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1565 GFP_KERNEL); 1566 mutex_unlock(&hv->hv_lock); 1567 1568 if (ret >= 0) 1569 return 0; 1570 1571 if (ret == -ENOSPC) 1572 ret = -EEXIST; 1573 eventfd_ctx_put(eventfd); 1574 return ret; 1575 } 1576 1577 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1578 { 1579 struct kvm_hv *hv = &kvm->arch.hyperv; 1580 struct eventfd_ctx *eventfd; 1581 1582 mutex_lock(&hv->hv_lock); 1583 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1584 mutex_unlock(&hv->hv_lock); 1585 1586 if (!eventfd) 1587 return -ENOENT; 1588 1589 synchronize_srcu(&kvm->srcu); 1590 eventfd_ctx_put(eventfd); 1591 return 0; 1592 } 1593 1594 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1595 { 1596 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1597 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1598 return -EINVAL; 1599 1600 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1601 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1602 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1603 } 1604