1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/lockable.h" 19 #include "qemu/queue.h" 20 #include "qemu/rcu.h" 21 #include "qemu/rcu_queue.h" 22 #include "hw/hyperv/hyperv.h" 23 #include "qom/object.h" 24 25 struct SynICState { 26 DeviceState parent_obj; 27 28 CPUState *cs; 29 30 bool enabled; 31 hwaddr msg_page_addr; 32 hwaddr event_page_addr; 33 MemoryRegion msg_page_mr; 34 MemoryRegion event_page_mr; 35 struct hyperv_message_page *msg_page; 36 struct hyperv_event_flags_page *event_page; 37 }; 38 typedef struct SynICState SynICState; 39 40 #define TYPE_SYNIC "hyperv-synic" 41 DECLARE_INSTANCE_CHECKER(SynICState, SYNIC, 42 TYPE_SYNIC) 43 44 static bool synic_enabled; 45 46 bool hyperv_is_synic_enabled(void) 47 { 48 return synic_enabled; 49 } 50 51 static SynICState *get_synic(CPUState *cs) 52 { 53 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 54 } 55 56 static void synic_update(SynICState *synic, bool enable, 57 hwaddr msg_page_addr, hwaddr event_page_addr) 58 { 59 60 synic->enabled = enable; 61 if (synic->msg_page_addr != msg_page_addr) { 62 if (synic->msg_page_addr) { 63 memory_region_del_subregion(get_system_memory(), 64 &synic->msg_page_mr); 65 } 66 if (msg_page_addr) { 67 memory_region_add_subregion(get_system_memory(), msg_page_addr, 68 &synic->msg_page_mr); 69 } 70 synic->msg_page_addr = msg_page_addr; 71 } 72 if (synic->event_page_addr != event_page_addr) { 73 if (synic->event_page_addr) { 74 memory_region_del_subregion(get_system_memory(), 75 &synic->event_page_mr); 76 } 77 if (event_page_addr) { 78 memory_region_add_subregion(get_system_memory(), event_page_addr, 79 &synic->event_page_mr); 80 } 81 synic->event_page_addr = event_page_addr; 82 } 83 } 84 85 void hyperv_synic_update(CPUState *cs, bool enable, 86 hwaddr msg_page_addr, hwaddr event_page_addr) 87 { 88 SynICState *synic = get_synic(cs); 89 90 if (!synic) { 91 return; 92 } 93 94 synic_update(synic, enable, msg_page_addr, event_page_addr); 95 } 96 97 static void synic_realize(DeviceState *dev, Error **errp) 98 { 99 Object *obj = OBJECT(dev); 100 SynICState *synic = SYNIC(dev); 101 char *msgp_name, *eventp_name; 102 uint32_t vp_index; 103 104 /* memory region names have to be globally unique */ 105 vp_index = hyperv_vp_index(synic->cs); 106 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 107 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 108 109 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 110 sizeof(*synic->msg_page), &error_abort); 111 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 112 sizeof(*synic->event_page), &error_abort); 113 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 114 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 115 116 g_free(msgp_name); 117 g_free(eventp_name); 118 } 119 static void synic_reset(DeviceState *dev) 120 { 121 SynICState *synic = SYNIC(dev); 122 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 123 memset(synic->event_page, 0, sizeof(*synic->event_page)); 124 synic_update(synic, false, 0, 0); 125 } 126 127 static void synic_class_init(ObjectClass *klass, void *data) 128 { 129 DeviceClass *dc = DEVICE_CLASS(klass); 130 131 dc->realize = synic_realize; 132 dc->reset = synic_reset; 133 dc->user_creatable = false; 134 } 135 136 void hyperv_synic_add(CPUState *cs) 137 { 138 Object *obj; 139 SynICState *synic; 140 141 obj = object_new(TYPE_SYNIC); 142 synic = SYNIC(obj); 143 synic->cs = cs; 144 object_property_add_child(OBJECT(cs), "synic", obj); 145 object_unref(obj); 146 qdev_realize(DEVICE(obj), NULL, &error_abort); 147 synic_enabled = true; 148 } 149 150 void hyperv_synic_reset(CPUState *cs) 151 { 152 SynICState *synic = get_synic(cs); 153 154 if (synic) { 155 device_legacy_reset(DEVICE(synic)); 156 } 157 } 158 159 static const TypeInfo synic_type_info = { 160 .name = TYPE_SYNIC, 161 .parent = TYPE_DEVICE, 162 .instance_size = sizeof(SynICState), 163 .class_init = synic_class_init, 164 }; 165 166 static void synic_register_types(void) 167 { 168 type_register_static(&synic_type_info); 169 } 170 171 type_init(synic_register_types) 172 173 /* 174 * KVM has its own message producers (SynIC timers). To guarantee 175 * serialization with both KVM vcpu and the guest cpu, the messages are first 176 * staged in an intermediate area and then posted to the SynIC message page in 177 * the vcpu thread. 178 */ 179 typedef struct HvSintStagedMessage { 180 /* message content staged by hyperv_post_msg */ 181 struct hyperv_message msg; 182 /* callback + data (r/o) to complete the processing in a BH */ 183 HvSintMsgCb cb; 184 void *cb_data; 185 /* message posting status filled by cpu_post_msg */ 186 int status; 187 /* passing the buck: */ 188 enum { 189 /* initial state */ 190 HV_STAGED_MSG_FREE, 191 /* 192 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 193 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 194 */ 195 HV_STAGED_MSG_BUSY, 196 /* 197 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 198 * notify the guest, records the status, marks the posting done (BUSY 199 * -> POSTED), and schedules sint_msg_bh BH 200 */ 201 HV_STAGED_MSG_POSTED, 202 /* 203 * sint_msg_bh (BH) verifies that the posting is done, runs the 204 * callback, and starts over (POSTED -> FREE) 205 */ 206 } state; 207 } HvSintStagedMessage; 208 209 struct HvSintRoute { 210 uint32_t sint; 211 SynICState *synic; 212 int gsi; 213 EventNotifier sint_set_notifier; 214 EventNotifier sint_ack_notifier; 215 216 HvSintStagedMessage *staged_msg; 217 218 unsigned refcount; 219 }; 220 221 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 222 { 223 CPUState *cs = qemu_get_cpu(vp_index); 224 assert(hyperv_vp_index(cs) == vp_index); 225 return cs; 226 } 227 228 /* 229 * BH to complete the processing of a staged message. 230 */ 231 static void sint_msg_bh(void *opaque) 232 { 233 HvSintRoute *sint_route = opaque; 234 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 235 236 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 237 /* status nor ready yet (spurious ack from guest?), ignore */ 238 return; 239 } 240 241 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 242 staged_msg->status = 0; 243 244 /* staged message processing finished, ready to start over */ 245 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 246 /* drop the reference taken in hyperv_post_msg */ 247 hyperv_sint_route_unref(sint_route); 248 } 249 250 /* 251 * Worker to transfer the message from the staging area into the SynIC message 252 * page in vcpu context. 253 */ 254 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 255 { 256 HvSintRoute *sint_route = data.host_ptr; 257 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 258 SynICState *synic = sint_route->synic; 259 struct hyperv_message *dst_msg; 260 bool wait_for_sint_ack = false; 261 262 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 263 264 if (!synic->enabled || !synic->msg_page_addr) { 265 staged_msg->status = -ENXIO; 266 goto posted; 267 } 268 269 dst_msg = &synic->msg_page->slot[sint_route->sint]; 270 271 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 272 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 273 staged_msg->status = -EAGAIN; 274 wait_for_sint_ack = true; 275 } else { 276 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 277 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 278 } 279 280 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 281 282 posted: 283 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 284 /* 285 * Notify the msg originator of the progress made; if the slot was busy we 286 * set msg_pending flag in it so it will be the guest who will do EOM and 287 * trigger the notification from KVM via sint_ack_notifier 288 */ 289 if (!wait_for_sint_ack) { 290 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 291 sint_route); 292 } 293 } 294 295 /* 296 * Post a Hyper-V message to the staging area, for delivery to guest in the 297 * vcpu thread. 298 */ 299 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 300 { 301 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 302 303 assert(staged_msg); 304 305 /* grab the staging area */ 306 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 307 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 308 return -EAGAIN; 309 } 310 311 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 312 313 /* hold a reference on sint_route until the callback is finished */ 314 hyperv_sint_route_ref(sint_route); 315 316 /* schedule message posting attempt in vcpu thread */ 317 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 318 RUN_ON_CPU_HOST_PTR(sint_route)); 319 return 0; 320 } 321 322 static void sint_ack_handler(EventNotifier *notifier) 323 { 324 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 325 sint_ack_notifier); 326 event_notifier_test_and_clear(notifier); 327 328 /* 329 * the guest consumed the previous message so complete the current one with 330 * -EAGAIN and let the msg originator retry 331 */ 332 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 333 } 334 335 /* 336 * Set given event flag for a given sint on a given vcpu, and signal the sint. 337 */ 338 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 339 { 340 int ret; 341 SynICState *synic = sint_route->synic; 342 unsigned long *flags, set_mask; 343 unsigned set_idx; 344 345 if (eventno > HV_EVENT_FLAGS_COUNT) { 346 return -EINVAL; 347 } 348 if (!synic->enabled || !synic->event_page_addr) { 349 return -ENXIO; 350 } 351 352 set_idx = BIT_WORD(eventno); 353 set_mask = BIT_MASK(eventno); 354 flags = synic->event_page->slot[sint_route->sint].flags; 355 356 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 357 memory_region_set_dirty(&synic->event_page_mr, 0, 358 sizeof(*synic->event_page)); 359 ret = hyperv_sint_route_set_sint(sint_route); 360 } else { 361 ret = 0; 362 } 363 return ret; 364 } 365 366 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 367 HvSintMsgCb cb, void *cb_data) 368 { 369 HvSintRoute *sint_route; 370 EventNotifier *ack_notifier; 371 int r, gsi; 372 CPUState *cs; 373 SynICState *synic; 374 375 cs = hyperv_find_vcpu(vp_index); 376 if (!cs) { 377 return NULL; 378 } 379 380 synic = get_synic(cs); 381 if (!synic) { 382 return NULL; 383 } 384 385 sint_route = g_new0(HvSintRoute, 1); 386 r = event_notifier_init(&sint_route->sint_set_notifier, false); 387 if (r) { 388 goto err; 389 } 390 391 392 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 393 if (ack_notifier) { 394 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 395 sint_route->staged_msg->cb = cb; 396 sint_route->staged_msg->cb_data = cb_data; 397 398 r = event_notifier_init(ack_notifier, false); 399 if (r) { 400 goto err_sint_set_notifier; 401 } 402 403 event_notifier_set_handler(ack_notifier, sint_ack_handler); 404 } 405 406 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 407 if (gsi < 0) { 408 goto err_gsi; 409 } 410 411 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 412 &sint_route->sint_set_notifier, 413 ack_notifier, gsi); 414 if (r) { 415 goto err_irqfd; 416 } 417 sint_route->gsi = gsi; 418 sint_route->synic = synic; 419 sint_route->sint = sint; 420 sint_route->refcount = 1; 421 422 return sint_route; 423 424 err_irqfd: 425 kvm_irqchip_release_virq(kvm_state, gsi); 426 err_gsi: 427 if (ack_notifier) { 428 event_notifier_set_handler(ack_notifier, NULL); 429 event_notifier_cleanup(ack_notifier); 430 g_free(sint_route->staged_msg); 431 } 432 err_sint_set_notifier: 433 event_notifier_cleanup(&sint_route->sint_set_notifier); 434 err: 435 g_free(sint_route); 436 437 return NULL; 438 } 439 440 void hyperv_sint_route_ref(HvSintRoute *sint_route) 441 { 442 sint_route->refcount++; 443 } 444 445 void hyperv_sint_route_unref(HvSintRoute *sint_route) 446 { 447 if (!sint_route) { 448 return; 449 } 450 451 assert(sint_route->refcount > 0); 452 453 if (--sint_route->refcount) { 454 return; 455 } 456 457 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 458 &sint_route->sint_set_notifier, 459 sint_route->gsi); 460 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 461 if (sint_route->staged_msg) { 462 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 463 event_notifier_cleanup(&sint_route->sint_ack_notifier); 464 g_free(sint_route->staged_msg); 465 } 466 event_notifier_cleanup(&sint_route->sint_set_notifier); 467 g_free(sint_route); 468 } 469 470 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 471 { 472 return event_notifier_set(&sint_route->sint_set_notifier); 473 } 474 475 typedef struct MsgHandler { 476 struct rcu_head rcu; 477 QLIST_ENTRY(MsgHandler) link; 478 uint32_t conn_id; 479 HvMsgHandler handler; 480 void *data; 481 } MsgHandler; 482 483 typedef struct EventFlagHandler { 484 struct rcu_head rcu; 485 QLIST_ENTRY(EventFlagHandler) link; 486 uint32_t conn_id; 487 EventNotifier *notifier; 488 } EventFlagHandler; 489 490 static QLIST_HEAD(, MsgHandler) msg_handlers; 491 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 492 static QemuMutex handlers_mutex; 493 494 static void __attribute__((constructor)) hv_init(void) 495 { 496 QLIST_INIT(&msg_handlers); 497 QLIST_INIT(&event_flag_handlers); 498 qemu_mutex_init(&handlers_mutex); 499 } 500 501 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 502 { 503 int ret; 504 MsgHandler *mh; 505 506 QEMU_LOCK_GUARD(&handlers_mutex); 507 QLIST_FOREACH(mh, &msg_handlers, link) { 508 if (mh->conn_id == conn_id) { 509 if (handler) { 510 ret = -EEXIST; 511 } else { 512 QLIST_REMOVE_RCU(mh, link); 513 g_free_rcu(mh, rcu); 514 ret = 0; 515 } 516 return ret; 517 } 518 } 519 520 if (handler) { 521 mh = g_new(MsgHandler, 1); 522 mh->conn_id = conn_id; 523 mh->handler = handler; 524 mh->data = data; 525 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 526 ret = 0; 527 } else { 528 ret = -ENOENT; 529 } 530 531 return ret; 532 } 533 534 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 535 { 536 uint16_t ret; 537 hwaddr len; 538 struct hyperv_post_message_input *msg; 539 MsgHandler *mh; 540 541 if (fast) { 542 return HV_STATUS_INVALID_HYPERCALL_CODE; 543 } 544 if (param & (__alignof__(*msg) - 1)) { 545 return HV_STATUS_INVALID_ALIGNMENT; 546 } 547 548 len = sizeof(*msg); 549 msg = cpu_physical_memory_map(param, &len, 0); 550 if (len < sizeof(*msg)) { 551 ret = HV_STATUS_INSUFFICIENT_MEMORY; 552 goto unmap; 553 } 554 if (msg->payload_size > sizeof(msg->payload)) { 555 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 556 goto unmap; 557 } 558 559 ret = HV_STATUS_INVALID_CONNECTION_ID; 560 WITH_RCU_READ_LOCK_GUARD() { 561 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 562 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 563 ret = mh->handler(msg, mh->data); 564 break; 565 } 566 } 567 } 568 569 unmap: 570 cpu_physical_memory_unmap(msg, len, 0, 0); 571 return ret; 572 } 573 574 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 575 { 576 int ret; 577 EventFlagHandler *handler; 578 579 QEMU_LOCK_GUARD(&handlers_mutex); 580 QLIST_FOREACH(handler, &event_flag_handlers, link) { 581 if (handler->conn_id == conn_id) { 582 if (notifier) { 583 ret = -EEXIST; 584 } else { 585 QLIST_REMOVE_RCU(handler, link); 586 g_free_rcu(handler, rcu); 587 ret = 0; 588 } 589 return ret; 590 } 591 } 592 593 if (notifier) { 594 handler = g_new(EventFlagHandler, 1); 595 handler->conn_id = conn_id; 596 handler->notifier = notifier; 597 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 598 ret = 0; 599 } else { 600 ret = -ENOENT; 601 } 602 603 return ret; 604 } 605 606 static bool process_event_flags_userspace; 607 608 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 609 { 610 if (!process_event_flags_userspace && 611 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 612 process_event_flags_userspace = true; 613 614 warn_report("Hyper-V event signaling is not supported by this kernel; " 615 "using slower userspace hypercall processing"); 616 } 617 618 if (!process_event_flags_userspace) { 619 struct kvm_hyperv_eventfd hvevfd = { 620 .conn_id = conn_id, 621 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 622 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 623 }; 624 625 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 626 } 627 return set_event_flag_handler(conn_id, notifier); 628 } 629 630 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 631 { 632 EventFlagHandler *handler; 633 634 if (unlikely(!fast)) { 635 hwaddr addr = param; 636 637 if (addr & (__alignof__(addr) - 1)) { 638 return HV_STATUS_INVALID_ALIGNMENT; 639 } 640 641 param = ldq_phys(&address_space_memory, addr); 642 } 643 644 /* 645 * Per spec, bits 32-47 contain the extra "flag number". However, we 646 * have no use for it, and in all known usecases it is zero, so just 647 * report lookup failure if it isn't. 648 */ 649 if (param & 0xffff00000000ULL) { 650 return HV_STATUS_INVALID_PORT_ID; 651 } 652 /* remaining bits are reserved-zero */ 653 if (param & ~HV_CONNECTION_ID_MASK) { 654 return HV_STATUS_INVALID_HYPERCALL_INPUT; 655 } 656 657 RCU_READ_LOCK_GUARD(); 658 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 659 if (handler->conn_id == param) { 660 event_notifier_set(handler->notifier); 661 return 0; 662 } 663 } 664 return HV_STATUS_INVALID_CONNECTION_ID; 665 } 666