1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/lockable.h" 19 #include "qemu/queue.h" 20 #include "qemu/rcu.h" 21 #include "qemu/rcu_queue.h" 22 #include "hw/hyperv/hyperv.h" 23 #include "qom/object.h" 24 25 struct SynICState { 26 DeviceState parent_obj; 27 28 CPUState *cs; 29 30 bool enabled; 31 hwaddr msg_page_addr; 32 hwaddr event_page_addr; 33 MemoryRegion msg_page_mr; 34 MemoryRegion event_page_mr; 35 struct hyperv_message_page *msg_page; 36 struct hyperv_event_flags_page *event_page; 37 }; 38 39 #define TYPE_SYNIC "hyperv-synic" 40 OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC) 41 42 static bool synic_enabled; 43 44 bool hyperv_is_synic_enabled(void) 45 { 46 return synic_enabled; 47 } 48 49 static SynICState *get_synic(CPUState *cs) 50 { 51 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 52 } 53 54 static void synic_update(SynICState *synic, bool enable, 55 hwaddr msg_page_addr, hwaddr event_page_addr) 56 { 57 58 synic->enabled = enable; 59 if (synic->msg_page_addr != msg_page_addr) { 60 if (synic->msg_page_addr) { 61 memory_region_del_subregion(get_system_memory(), 62 &synic->msg_page_mr); 63 } 64 if (msg_page_addr) { 65 memory_region_add_subregion(get_system_memory(), msg_page_addr, 66 &synic->msg_page_mr); 67 } 68 synic->msg_page_addr = msg_page_addr; 69 } 70 if (synic->event_page_addr != event_page_addr) { 71 if (synic->event_page_addr) { 72 memory_region_del_subregion(get_system_memory(), 73 &synic->event_page_mr); 74 } 75 if (event_page_addr) { 76 memory_region_add_subregion(get_system_memory(), event_page_addr, 77 &synic->event_page_mr); 78 } 79 synic->event_page_addr = event_page_addr; 80 } 81 } 82 83 void hyperv_synic_update(CPUState *cs, bool enable, 84 hwaddr msg_page_addr, hwaddr event_page_addr) 85 { 86 SynICState *synic = get_synic(cs); 87 88 if (!synic) { 89 return; 90 } 91 92 synic_update(synic, enable, msg_page_addr, event_page_addr); 93 } 94 95 static void synic_realize(DeviceState *dev, Error **errp) 96 { 97 Object *obj = OBJECT(dev); 98 SynICState *synic = SYNIC(dev); 99 char *msgp_name, *eventp_name; 100 uint32_t vp_index; 101 102 /* memory region names have to be globally unique */ 103 vp_index = hyperv_vp_index(synic->cs); 104 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 105 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 106 107 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 108 sizeof(*synic->msg_page), &error_abort); 109 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 110 sizeof(*synic->event_page), &error_abort); 111 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 112 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 113 114 g_free(msgp_name); 115 g_free(eventp_name); 116 } 117 static void synic_reset(DeviceState *dev) 118 { 119 SynICState *synic = SYNIC(dev); 120 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 121 memset(synic->event_page, 0, sizeof(*synic->event_page)); 122 synic_update(synic, false, 0, 0); 123 } 124 125 static void synic_class_init(ObjectClass *klass, void *data) 126 { 127 DeviceClass *dc = DEVICE_CLASS(klass); 128 129 dc->realize = synic_realize; 130 dc->reset = synic_reset; 131 dc->user_creatable = false; 132 } 133 134 void hyperv_synic_add(CPUState *cs) 135 { 136 Object *obj; 137 SynICState *synic; 138 139 obj = object_new(TYPE_SYNIC); 140 synic = SYNIC(obj); 141 synic->cs = cs; 142 object_property_add_child(OBJECT(cs), "synic", obj); 143 object_unref(obj); 144 qdev_realize(DEVICE(obj), NULL, &error_abort); 145 synic_enabled = true; 146 } 147 148 void hyperv_synic_reset(CPUState *cs) 149 { 150 SynICState *synic = get_synic(cs); 151 152 if (synic) { 153 device_legacy_reset(DEVICE(synic)); 154 } 155 } 156 157 static const TypeInfo synic_type_info = { 158 .name = TYPE_SYNIC, 159 .parent = TYPE_DEVICE, 160 .instance_size = sizeof(SynICState), 161 .class_init = synic_class_init, 162 }; 163 164 static void synic_register_types(void) 165 { 166 type_register_static(&synic_type_info); 167 } 168 169 type_init(synic_register_types) 170 171 /* 172 * KVM has its own message producers (SynIC timers). To guarantee 173 * serialization with both KVM vcpu and the guest cpu, the messages are first 174 * staged in an intermediate area and then posted to the SynIC message page in 175 * the vcpu thread. 176 */ 177 typedef struct HvSintStagedMessage { 178 /* message content staged by hyperv_post_msg */ 179 struct hyperv_message msg; 180 /* callback + data (r/o) to complete the processing in a BH */ 181 HvSintMsgCb cb; 182 void *cb_data; 183 /* message posting status filled by cpu_post_msg */ 184 int status; 185 /* passing the buck: */ 186 enum { 187 /* initial state */ 188 HV_STAGED_MSG_FREE, 189 /* 190 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 191 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 192 */ 193 HV_STAGED_MSG_BUSY, 194 /* 195 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 196 * notify the guest, records the status, marks the posting done (BUSY 197 * -> POSTED), and schedules sint_msg_bh BH 198 */ 199 HV_STAGED_MSG_POSTED, 200 /* 201 * sint_msg_bh (BH) verifies that the posting is done, runs the 202 * callback, and starts over (POSTED -> FREE) 203 */ 204 } state; 205 } HvSintStagedMessage; 206 207 struct HvSintRoute { 208 uint32_t sint; 209 SynICState *synic; 210 int gsi; 211 EventNotifier sint_set_notifier; 212 EventNotifier sint_ack_notifier; 213 214 HvSintStagedMessage *staged_msg; 215 216 unsigned refcount; 217 }; 218 219 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 220 { 221 CPUState *cs = qemu_get_cpu(vp_index); 222 assert(hyperv_vp_index(cs) == vp_index); 223 return cs; 224 } 225 226 /* 227 * BH to complete the processing of a staged message. 228 */ 229 static void sint_msg_bh(void *opaque) 230 { 231 HvSintRoute *sint_route = opaque; 232 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 233 234 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 235 /* status nor ready yet (spurious ack from guest?), ignore */ 236 return; 237 } 238 239 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 240 staged_msg->status = 0; 241 242 /* staged message processing finished, ready to start over */ 243 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 244 /* drop the reference taken in hyperv_post_msg */ 245 hyperv_sint_route_unref(sint_route); 246 } 247 248 /* 249 * Worker to transfer the message from the staging area into the SynIC message 250 * page in vcpu context. 251 */ 252 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 253 { 254 HvSintRoute *sint_route = data.host_ptr; 255 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 256 SynICState *synic = sint_route->synic; 257 struct hyperv_message *dst_msg; 258 bool wait_for_sint_ack = false; 259 260 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 261 262 if (!synic->enabled || !synic->msg_page_addr) { 263 staged_msg->status = -ENXIO; 264 goto posted; 265 } 266 267 dst_msg = &synic->msg_page->slot[sint_route->sint]; 268 269 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 270 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 271 staged_msg->status = -EAGAIN; 272 wait_for_sint_ack = true; 273 } else { 274 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 275 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 276 } 277 278 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 279 280 posted: 281 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 282 /* 283 * Notify the msg originator of the progress made; if the slot was busy we 284 * set msg_pending flag in it so it will be the guest who will do EOM and 285 * trigger the notification from KVM via sint_ack_notifier 286 */ 287 if (!wait_for_sint_ack) { 288 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 289 sint_route); 290 } 291 } 292 293 /* 294 * Post a Hyper-V message to the staging area, for delivery to guest in the 295 * vcpu thread. 296 */ 297 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 298 { 299 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 300 301 assert(staged_msg); 302 303 /* grab the staging area */ 304 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 305 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 306 return -EAGAIN; 307 } 308 309 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 310 311 /* hold a reference on sint_route until the callback is finished */ 312 hyperv_sint_route_ref(sint_route); 313 314 /* schedule message posting attempt in vcpu thread */ 315 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 316 RUN_ON_CPU_HOST_PTR(sint_route)); 317 return 0; 318 } 319 320 static void sint_ack_handler(EventNotifier *notifier) 321 { 322 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 323 sint_ack_notifier); 324 event_notifier_test_and_clear(notifier); 325 326 /* 327 * the guest consumed the previous message so complete the current one with 328 * -EAGAIN and let the msg originator retry 329 */ 330 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 331 } 332 333 /* 334 * Set given event flag for a given sint on a given vcpu, and signal the sint. 335 */ 336 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 337 { 338 int ret; 339 SynICState *synic = sint_route->synic; 340 unsigned long *flags, set_mask; 341 unsigned set_idx; 342 343 if (eventno > HV_EVENT_FLAGS_COUNT) { 344 return -EINVAL; 345 } 346 if (!synic->enabled || !synic->event_page_addr) { 347 return -ENXIO; 348 } 349 350 set_idx = BIT_WORD(eventno); 351 set_mask = BIT_MASK(eventno); 352 flags = synic->event_page->slot[sint_route->sint].flags; 353 354 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 355 memory_region_set_dirty(&synic->event_page_mr, 0, 356 sizeof(*synic->event_page)); 357 ret = hyperv_sint_route_set_sint(sint_route); 358 } else { 359 ret = 0; 360 } 361 return ret; 362 } 363 364 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 365 HvSintMsgCb cb, void *cb_data) 366 { 367 HvSintRoute *sint_route; 368 EventNotifier *ack_notifier; 369 int r, gsi; 370 CPUState *cs; 371 SynICState *synic; 372 373 cs = hyperv_find_vcpu(vp_index); 374 if (!cs) { 375 return NULL; 376 } 377 378 synic = get_synic(cs); 379 if (!synic) { 380 return NULL; 381 } 382 383 sint_route = g_new0(HvSintRoute, 1); 384 r = event_notifier_init(&sint_route->sint_set_notifier, false); 385 if (r) { 386 goto err; 387 } 388 389 390 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 391 if (ack_notifier) { 392 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 393 sint_route->staged_msg->cb = cb; 394 sint_route->staged_msg->cb_data = cb_data; 395 396 r = event_notifier_init(ack_notifier, false); 397 if (r) { 398 goto err_sint_set_notifier; 399 } 400 401 event_notifier_set_handler(ack_notifier, sint_ack_handler); 402 } 403 404 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 405 if (gsi < 0) { 406 goto err_gsi; 407 } 408 409 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 410 &sint_route->sint_set_notifier, 411 ack_notifier, gsi); 412 if (r) { 413 goto err_irqfd; 414 } 415 sint_route->gsi = gsi; 416 sint_route->synic = synic; 417 sint_route->sint = sint; 418 sint_route->refcount = 1; 419 420 return sint_route; 421 422 err_irqfd: 423 kvm_irqchip_release_virq(kvm_state, gsi); 424 err_gsi: 425 if (ack_notifier) { 426 event_notifier_set_handler(ack_notifier, NULL); 427 event_notifier_cleanup(ack_notifier); 428 g_free(sint_route->staged_msg); 429 } 430 err_sint_set_notifier: 431 event_notifier_cleanup(&sint_route->sint_set_notifier); 432 err: 433 g_free(sint_route); 434 435 return NULL; 436 } 437 438 void hyperv_sint_route_ref(HvSintRoute *sint_route) 439 { 440 sint_route->refcount++; 441 } 442 443 void hyperv_sint_route_unref(HvSintRoute *sint_route) 444 { 445 if (!sint_route) { 446 return; 447 } 448 449 assert(sint_route->refcount > 0); 450 451 if (--sint_route->refcount) { 452 return; 453 } 454 455 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 456 &sint_route->sint_set_notifier, 457 sint_route->gsi); 458 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 459 if (sint_route->staged_msg) { 460 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 461 event_notifier_cleanup(&sint_route->sint_ack_notifier); 462 g_free(sint_route->staged_msg); 463 } 464 event_notifier_cleanup(&sint_route->sint_set_notifier); 465 g_free(sint_route); 466 } 467 468 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 469 { 470 return event_notifier_set(&sint_route->sint_set_notifier); 471 } 472 473 typedef struct MsgHandler { 474 struct rcu_head rcu; 475 QLIST_ENTRY(MsgHandler) link; 476 uint32_t conn_id; 477 HvMsgHandler handler; 478 void *data; 479 } MsgHandler; 480 481 typedef struct EventFlagHandler { 482 struct rcu_head rcu; 483 QLIST_ENTRY(EventFlagHandler) link; 484 uint32_t conn_id; 485 EventNotifier *notifier; 486 } EventFlagHandler; 487 488 static QLIST_HEAD(, MsgHandler) msg_handlers; 489 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 490 static QemuMutex handlers_mutex; 491 492 static void __attribute__((constructor)) hv_init(void) 493 { 494 QLIST_INIT(&msg_handlers); 495 QLIST_INIT(&event_flag_handlers); 496 qemu_mutex_init(&handlers_mutex); 497 } 498 499 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 500 { 501 int ret; 502 MsgHandler *mh; 503 504 QEMU_LOCK_GUARD(&handlers_mutex); 505 QLIST_FOREACH(mh, &msg_handlers, link) { 506 if (mh->conn_id == conn_id) { 507 if (handler) { 508 ret = -EEXIST; 509 } else { 510 QLIST_REMOVE_RCU(mh, link); 511 g_free_rcu(mh, rcu); 512 ret = 0; 513 } 514 return ret; 515 } 516 } 517 518 if (handler) { 519 mh = g_new(MsgHandler, 1); 520 mh->conn_id = conn_id; 521 mh->handler = handler; 522 mh->data = data; 523 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 524 ret = 0; 525 } else { 526 ret = -ENOENT; 527 } 528 529 return ret; 530 } 531 532 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 533 { 534 uint16_t ret; 535 hwaddr len; 536 struct hyperv_post_message_input *msg; 537 MsgHandler *mh; 538 539 if (fast) { 540 return HV_STATUS_INVALID_HYPERCALL_CODE; 541 } 542 if (param & (__alignof__(*msg) - 1)) { 543 return HV_STATUS_INVALID_ALIGNMENT; 544 } 545 546 len = sizeof(*msg); 547 msg = cpu_physical_memory_map(param, &len, 0); 548 if (len < sizeof(*msg)) { 549 ret = HV_STATUS_INSUFFICIENT_MEMORY; 550 goto unmap; 551 } 552 if (msg->payload_size > sizeof(msg->payload)) { 553 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 554 goto unmap; 555 } 556 557 ret = HV_STATUS_INVALID_CONNECTION_ID; 558 WITH_RCU_READ_LOCK_GUARD() { 559 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 560 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 561 ret = mh->handler(msg, mh->data); 562 break; 563 } 564 } 565 } 566 567 unmap: 568 cpu_physical_memory_unmap(msg, len, 0, 0); 569 return ret; 570 } 571 572 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 573 { 574 int ret; 575 EventFlagHandler *handler; 576 577 QEMU_LOCK_GUARD(&handlers_mutex); 578 QLIST_FOREACH(handler, &event_flag_handlers, link) { 579 if (handler->conn_id == conn_id) { 580 if (notifier) { 581 ret = -EEXIST; 582 } else { 583 QLIST_REMOVE_RCU(handler, link); 584 g_free_rcu(handler, rcu); 585 ret = 0; 586 } 587 return ret; 588 } 589 } 590 591 if (notifier) { 592 handler = g_new(EventFlagHandler, 1); 593 handler->conn_id = conn_id; 594 handler->notifier = notifier; 595 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 596 ret = 0; 597 } else { 598 ret = -ENOENT; 599 } 600 601 return ret; 602 } 603 604 static bool process_event_flags_userspace; 605 606 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 607 { 608 if (!process_event_flags_userspace && 609 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 610 process_event_flags_userspace = true; 611 612 warn_report("Hyper-V event signaling is not supported by this kernel; " 613 "using slower userspace hypercall processing"); 614 } 615 616 if (!process_event_flags_userspace) { 617 struct kvm_hyperv_eventfd hvevfd = { 618 .conn_id = conn_id, 619 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 620 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 621 }; 622 623 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 624 } 625 return set_event_flag_handler(conn_id, notifier); 626 } 627 628 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 629 { 630 EventFlagHandler *handler; 631 632 if (unlikely(!fast)) { 633 hwaddr addr = param; 634 635 if (addr & (__alignof__(addr) - 1)) { 636 return HV_STATUS_INVALID_ALIGNMENT; 637 } 638 639 param = ldq_phys(&address_space_memory, addr); 640 } 641 642 /* 643 * Per spec, bits 32-47 contain the extra "flag number". However, we 644 * have no use for it, and in all known usecases it is zero, so just 645 * report lookup failure if it isn't. 646 */ 647 if (param & 0xffff00000000ULL) { 648 return HV_STATUS_INVALID_PORT_ID; 649 } 650 /* remaining bits are reserved-zero */ 651 if (param & ~HV_CONNECTION_ID_MASK) { 652 return HV_STATUS_INVALID_HYPERCALL_INPUT; 653 } 654 655 RCU_READ_LOCK_GUARD(); 656 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 657 if (handler->conn_id == param) { 658 event_notifier_set(handler->notifier); 659 return 0; 660 } 661 } 662 return HV_STATUS_INVALID_CONNECTION_ID; 663 } 664