1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/lockable.h" 19 #include "qemu/queue.h" 20 #include "qemu/rcu.h" 21 #include "qemu/rcu_queue.h" 22 #include "hw/hyperv/hyperv.h" 23 24 typedef struct SynICState { 25 DeviceState parent_obj; 26 27 CPUState *cs; 28 29 bool enabled; 30 hwaddr msg_page_addr; 31 hwaddr event_page_addr; 32 MemoryRegion msg_page_mr; 33 MemoryRegion event_page_mr; 34 struct hyperv_message_page *msg_page; 35 struct hyperv_event_flags_page *event_page; 36 } SynICState; 37 38 #define TYPE_SYNIC "hyperv-synic" 39 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) 40 41 static bool synic_enabled; 42 43 bool hyperv_is_synic_enabled(void) 44 { 45 return synic_enabled; 46 } 47 48 static SynICState *get_synic(CPUState *cs) 49 { 50 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 51 } 52 53 static void synic_update(SynICState *synic, bool enable, 54 hwaddr msg_page_addr, hwaddr event_page_addr) 55 { 56 57 synic->enabled = enable; 58 if (synic->msg_page_addr != msg_page_addr) { 59 if (synic->msg_page_addr) { 60 memory_region_del_subregion(get_system_memory(), 61 &synic->msg_page_mr); 62 } 63 if (msg_page_addr) { 64 memory_region_add_subregion(get_system_memory(), msg_page_addr, 65 &synic->msg_page_mr); 66 } 67 synic->msg_page_addr = msg_page_addr; 68 } 69 if (synic->event_page_addr != event_page_addr) { 70 if (synic->event_page_addr) { 71 memory_region_del_subregion(get_system_memory(), 72 &synic->event_page_mr); 73 } 74 if (event_page_addr) { 75 memory_region_add_subregion(get_system_memory(), event_page_addr, 76 &synic->event_page_mr); 77 } 78 synic->event_page_addr = event_page_addr; 79 } 80 } 81 82 void hyperv_synic_update(CPUState *cs, bool enable, 83 hwaddr msg_page_addr, hwaddr event_page_addr) 84 { 85 SynICState *synic = get_synic(cs); 86 87 if (!synic) { 88 return; 89 } 90 91 synic_update(synic, enable, msg_page_addr, event_page_addr); 92 } 93 94 static void synic_realize(DeviceState *dev, Error **errp) 95 { 96 Object *obj = OBJECT(dev); 97 SynICState *synic = SYNIC(dev); 98 char *msgp_name, *eventp_name; 99 uint32_t vp_index; 100 101 /* memory region names have to be globally unique */ 102 vp_index = hyperv_vp_index(synic->cs); 103 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 104 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 105 106 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 107 sizeof(*synic->msg_page), &error_abort); 108 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 109 sizeof(*synic->event_page), &error_abort); 110 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 111 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 112 113 g_free(msgp_name); 114 g_free(eventp_name); 115 } 116 static void synic_reset(DeviceState *dev) 117 { 118 SynICState *synic = SYNIC(dev); 119 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 120 memset(synic->event_page, 0, sizeof(*synic->event_page)); 121 synic_update(synic, false, 0, 0); 122 } 123 124 static void synic_class_init(ObjectClass *klass, void *data) 125 { 126 DeviceClass *dc = DEVICE_CLASS(klass); 127 128 dc->realize = synic_realize; 129 dc->reset = synic_reset; 130 dc->user_creatable = false; 131 } 132 133 void hyperv_synic_add(CPUState *cs) 134 { 135 Object *obj; 136 SynICState *synic; 137 138 obj = object_new(TYPE_SYNIC); 139 synic = SYNIC(obj); 140 synic->cs = cs; 141 object_property_add_child(OBJECT(cs), "synic", obj); 142 object_unref(obj); 143 object_property_set_bool(obj, true, "realized", &error_abort); 144 synic_enabled = true; 145 } 146 147 void hyperv_synic_reset(CPUState *cs) 148 { 149 SynICState *synic = get_synic(cs); 150 151 if (synic) { 152 device_legacy_reset(DEVICE(synic)); 153 } 154 } 155 156 static const TypeInfo synic_type_info = { 157 .name = TYPE_SYNIC, 158 .parent = TYPE_DEVICE, 159 .instance_size = sizeof(SynICState), 160 .class_init = synic_class_init, 161 }; 162 163 static void synic_register_types(void) 164 { 165 type_register_static(&synic_type_info); 166 } 167 168 type_init(synic_register_types) 169 170 /* 171 * KVM has its own message producers (SynIC timers). To guarantee 172 * serialization with both KVM vcpu and the guest cpu, the messages are first 173 * staged in an intermediate area and then posted to the SynIC message page in 174 * the vcpu thread. 175 */ 176 typedef struct HvSintStagedMessage { 177 /* message content staged by hyperv_post_msg */ 178 struct hyperv_message msg; 179 /* callback + data (r/o) to complete the processing in a BH */ 180 HvSintMsgCb cb; 181 void *cb_data; 182 /* message posting status filled by cpu_post_msg */ 183 int status; 184 /* passing the buck: */ 185 enum { 186 /* initial state */ 187 HV_STAGED_MSG_FREE, 188 /* 189 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 190 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 191 */ 192 HV_STAGED_MSG_BUSY, 193 /* 194 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 195 * notify the guest, records the status, marks the posting done (BUSY 196 * -> POSTED), and schedules sint_msg_bh BH 197 */ 198 HV_STAGED_MSG_POSTED, 199 /* 200 * sint_msg_bh (BH) verifies that the posting is done, runs the 201 * callback, and starts over (POSTED -> FREE) 202 */ 203 } state; 204 } HvSintStagedMessage; 205 206 struct HvSintRoute { 207 uint32_t sint; 208 SynICState *synic; 209 int gsi; 210 EventNotifier sint_set_notifier; 211 EventNotifier sint_ack_notifier; 212 213 HvSintStagedMessage *staged_msg; 214 215 unsigned refcount; 216 }; 217 218 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 219 { 220 CPUState *cs = qemu_get_cpu(vp_index); 221 assert(hyperv_vp_index(cs) == vp_index); 222 return cs; 223 } 224 225 /* 226 * BH to complete the processing of a staged message. 227 */ 228 static void sint_msg_bh(void *opaque) 229 { 230 HvSintRoute *sint_route = opaque; 231 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 232 233 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 234 /* status nor ready yet (spurious ack from guest?), ignore */ 235 return; 236 } 237 238 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 239 staged_msg->status = 0; 240 241 /* staged message processing finished, ready to start over */ 242 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 243 /* drop the reference taken in hyperv_post_msg */ 244 hyperv_sint_route_unref(sint_route); 245 } 246 247 /* 248 * Worker to transfer the message from the staging area into the SynIC message 249 * page in vcpu context. 250 */ 251 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 252 { 253 HvSintRoute *sint_route = data.host_ptr; 254 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 255 SynICState *synic = sint_route->synic; 256 struct hyperv_message *dst_msg; 257 bool wait_for_sint_ack = false; 258 259 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 260 261 if (!synic->enabled || !synic->msg_page_addr) { 262 staged_msg->status = -ENXIO; 263 goto posted; 264 } 265 266 dst_msg = &synic->msg_page->slot[sint_route->sint]; 267 268 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 269 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 270 staged_msg->status = -EAGAIN; 271 wait_for_sint_ack = true; 272 } else { 273 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 274 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 275 } 276 277 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 278 279 posted: 280 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 281 /* 282 * Notify the msg originator of the progress made; if the slot was busy we 283 * set msg_pending flag in it so it will be the guest who will do EOM and 284 * trigger the notification from KVM via sint_ack_notifier 285 */ 286 if (!wait_for_sint_ack) { 287 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 288 sint_route); 289 } 290 } 291 292 /* 293 * Post a Hyper-V message to the staging area, for delivery to guest in the 294 * vcpu thread. 295 */ 296 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 297 { 298 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 299 300 assert(staged_msg); 301 302 /* grab the staging area */ 303 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 304 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 305 return -EAGAIN; 306 } 307 308 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 309 310 /* hold a reference on sint_route until the callback is finished */ 311 hyperv_sint_route_ref(sint_route); 312 313 /* schedule message posting attempt in vcpu thread */ 314 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 315 RUN_ON_CPU_HOST_PTR(sint_route)); 316 return 0; 317 } 318 319 static void sint_ack_handler(EventNotifier *notifier) 320 { 321 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 322 sint_ack_notifier); 323 event_notifier_test_and_clear(notifier); 324 325 /* 326 * the guest consumed the previous message so complete the current one with 327 * -EAGAIN and let the msg originator retry 328 */ 329 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 330 } 331 332 /* 333 * Set given event flag for a given sint on a given vcpu, and signal the sint. 334 */ 335 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 336 { 337 int ret; 338 SynICState *synic = sint_route->synic; 339 unsigned long *flags, set_mask; 340 unsigned set_idx; 341 342 if (eventno > HV_EVENT_FLAGS_COUNT) { 343 return -EINVAL; 344 } 345 if (!synic->enabled || !synic->event_page_addr) { 346 return -ENXIO; 347 } 348 349 set_idx = BIT_WORD(eventno); 350 set_mask = BIT_MASK(eventno); 351 flags = synic->event_page->slot[sint_route->sint].flags; 352 353 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 354 memory_region_set_dirty(&synic->event_page_mr, 0, 355 sizeof(*synic->event_page)); 356 ret = hyperv_sint_route_set_sint(sint_route); 357 } else { 358 ret = 0; 359 } 360 return ret; 361 } 362 363 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 364 HvSintMsgCb cb, void *cb_data) 365 { 366 HvSintRoute *sint_route; 367 EventNotifier *ack_notifier; 368 int r, gsi; 369 CPUState *cs; 370 SynICState *synic; 371 372 cs = hyperv_find_vcpu(vp_index); 373 if (!cs) { 374 return NULL; 375 } 376 377 synic = get_synic(cs); 378 if (!synic) { 379 return NULL; 380 } 381 382 sint_route = g_new0(HvSintRoute, 1); 383 r = event_notifier_init(&sint_route->sint_set_notifier, false); 384 if (r) { 385 goto err; 386 } 387 388 389 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 390 if (ack_notifier) { 391 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 392 sint_route->staged_msg->cb = cb; 393 sint_route->staged_msg->cb_data = cb_data; 394 395 r = event_notifier_init(ack_notifier, false); 396 if (r) { 397 goto err_sint_set_notifier; 398 } 399 400 event_notifier_set_handler(ack_notifier, sint_ack_handler); 401 } 402 403 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 404 if (gsi < 0) { 405 goto err_gsi; 406 } 407 408 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 409 &sint_route->sint_set_notifier, 410 ack_notifier, gsi); 411 if (r) { 412 goto err_irqfd; 413 } 414 sint_route->gsi = gsi; 415 sint_route->synic = synic; 416 sint_route->sint = sint; 417 sint_route->refcount = 1; 418 419 return sint_route; 420 421 err_irqfd: 422 kvm_irqchip_release_virq(kvm_state, gsi); 423 err_gsi: 424 if (ack_notifier) { 425 event_notifier_set_handler(ack_notifier, NULL); 426 event_notifier_cleanup(ack_notifier); 427 g_free(sint_route->staged_msg); 428 } 429 err_sint_set_notifier: 430 event_notifier_cleanup(&sint_route->sint_set_notifier); 431 err: 432 g_free(sint_route); 433 434 return NULL; 435 } 436 437 void hyperv_sint_route_ref(HvSintRoute *sint_route) 438 { 439 sint_route->refcount++; 440 } 441 442 void hyperv_sint_route_unref(HvSintRoute *sint_route) 443 { 444 if (!sint_route) { 445 return; 446 } 447 448 assert(sint_route->refcount > 0); 449 450 if (--sint_route->refcount) { 451 return; 452 } 453 454 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 455 &sint_route->sint_set_notifier, 456 sint_route->gsi); 457 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 458 if (sint_route->staged_msg) { 459 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 460 event_notifier_cleanup(&sint_route->sint_ack_notifier); 461 g_free(sint_route->staged_msg); 462 } 463 event_notifier_cleanup(&sint_route->sint_set_notifier); 464 g_free(sint_route); 465 } 466 467 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 468 { 469 return event_notifier_set(&sint_route->sint_set_notifier); 470 } 471 472 typedef struct MsgHandler { 473 struct rcu_head rcu; 474 QLIST_ENTRY(MsgHandler) link; 475 uint32_t conn_id; 476 HvMsgHandler handler; 477 void *data; 478 } MsgHandler; 479 480 typedef struct EventFlagHandler { 481 struct rcu_head rcu; 482 QLIST_ENTRY(EventFlagHandler) link; 483 uint32_t conn_id; 484 EventNotifier *notifier; 485 } EventFlagHandler; 486 487 static QLIST_HEAD(, MsgHandler) msg_handlers; 488 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 489 static QemuMutex handlers_mutex; 490 491 static void __attribute__((constructor)) hv_init(void) 492 { 493 QLIST_INIT(&msg_handlers); 494 QLIST_INIT(&event_flag_handlers); 495 qemu_mutex_init(&handlers_mutex); 496 } 497 498 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 499 { 500 int ret; 501 MsgHandler *mh; 502 503 QEMU_LOCK_GUARD(&handlers_mutex); 504 QLIST_FOREACH(mh, &msg_handlers, link) { 505 if (mh->conn_id == conn_id) { 506 if (handler) { 507 ret = -EEXIST; 508 } else { 509 QLIST_REMOVE_RCU(mh, link); 510 g_free_rcu(mh, rcu); 511 ret = 0; 512 } 513 return ret; 514 } 515 } 516 517 if (handler) { 518 mh = g_new(MsgHandler, 1); 519 mh->conn_id = conn_id; 520 mh->handler = handler; 521 mh->data = data; 522 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 523 ret = 0; 524 } else { 525 ret = -ENOENT; 526 } 527 528 return ret; 529 } 530 531 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 532 { 533 uint16_t ret; 534 hwaddr len; 535 struct hyperv_post_message_input *msg; 536 MsgHandler *mh; 537 538 if (fast) { 539 return HV_STATUS_INVALID_HYPERCALL_CODE; 540 } 541 if (param & (__alignof__(*msg) - 1)) { 542 return HV_STATUS_INVALID_ALIGNMENT; 543 } 544 545 len = sizeof(*msg); 546 msg = cpu_physical_memory_map(param, &len, 0); 547 if (len < sizeof(*msg)) { 548 ret = HV_STATUS_INSUFFICIENT_MEMORY; 549 goto unmap; 550 } 551 if (msg->payload_size > sizeof(msg->payload)) { 552 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 553 goto unmap; 554 } 555 556 ret = HV_STATUS_INVALID_CONNECTION_ID; 557 WITH_RCU_READ_LOCK_GUARD() { 558 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 559 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 560 ret = mh->handler(msg, mh->data); 561 break; 562 } 563 } 564 } 565 566 unmap: 567 cpu_physical_memory_unmap(msg, len, 0, 0); 568 return ret; 569 } 570 571 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 572 { 573 int ret; 574 EventFlagHandler *handler; 575 576 QEMU_LOCK_GUARD(&handlers_mutex); 577 QLIST_FOREACH(handler, &event_flag_handlers, link) { 578 if (handler->conn_id == conn_id) { 579 if (notifier) { 580 ret = -EEXIST; 581 } else { 582 QLIST_REMOVE_RCU(handler, link); 583 g_free_rcu(handler, rcu); 584 ret = 0; 585 } 586 return ret; 587 } 588 } 589 590 if (notifier) { 591 handler = g_new(EventFlagHandler, 1); 592 handler->conn_id = conn_id; 593 handler->notifier = notifier; 594 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 595 ret = 0; 596 } else { 597 ret = -ENOENT; 598 } 599 600 return ret; 601 } 602 603 static bool process_event_flags_userspace; 604 605 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 606 { 607 if (!process_event_flags_userspace && 608 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 609 process_event_flags_userspace = true; 610 611 warn_report("Hyper-V event signaling is not supported by this kernel; " 612 "using slower userspace hypercall processing"); 613 } 614 615 if (!process_event_flags_userspace) { 616 struct kvm_hyperv_eventfd hvevfd = { 617 .conn_id = conn_id, 618 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 619 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 620 }; 621 622 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 623 } 624 return set_event_flag_handler(conn_id, notifier); 625 } 626 627 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 628 { 629 EventFlagHandler *handler; 630 631 if (unlikely(!fast)) { 632 hwaddr addr = param; 633 634 if (addr & (__alignof__(addr) - 1)) { 635 return HV_STATUS_INVALID_ALIGNMENT; 636 } 637 638 param = ldq_phys(&address_space_memory, addr); 639 } 640 641 /* 642 * Per spec, bits 32-47 contain the extra "flag number". However, we 643 * have no use for it, and in all known usecases it is zero, so just 644 * report lookup failure if it isn't. 645 */ 646 if (param & 0xffff00000000ULL) { 647 return HV_STATUS_INVALID_PORT_ID; 648 } 649 /* remaining bits are reserved-zero */ 650 if (param & ~HV_CONNECTION_ID_MASK) { 651 return HV_STATUS_INVALID_HYPERCALL_INPUT; 652 } 653 654 RCU_READ_LOCK_GUARD(); 655 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 656 if (handler->conn_id == param) { 657 event_notifier_set(handler->notifier); 658 return 0; 659 } 660 } 661 return HV_STATUS_INVALID_CONNECTION_ID; 662 } 663