1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/lockable.h" 19 #include "qemu/queue.h" 20 #include "qemu/rcu.h" 21 #include "qemu/rcu_queue.h" 22 #include "hw/hyperv/hyperv.h" 23 24 typedef struct SynICState { 25 DeviceState parent_obj; 26 27 CPUState *cs; 28 29 bool enabled; 30 hwaddr msg_page_addr; 31 hwaddr event_page_addr; 32 MemoryRegion msg_page_mr; 33 MemoryRegion event_page_mr; 34 struct hyperv_message_page *msg_page; 35 struct hyperv_event_flags_page *event_page; 36 } SynICState; 37 38 #define TYPE_SYNIC "hyperv-synic" 39 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) 40 41 static SynICState *get_synic(CPUState *cs) 42 { 43 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 44 } 45 46 static void synic_update(SynICState *synic, bool enable, 47 hwaddr msg_page_addr, hwaddr event_page_addr) 48 { 49 50 synic->enabled = enable; 51 if (synic->msg_page_addr != msg_page_addr) { 52 if (synic->msg_page_addr) { 53 memory_region_del_subregion(get_system_memory(), 54 &synic->msg_page_mr); 55 } 56 if (msg_page_addr) { 57 memory_region_add_subregion(get_system_memory(), msg_page_addr, 58 &synic->msg_page_mr); 59 } 60 synic->msg_page_addr = msg_page_addr; 61 } 62 if (synic->event_page_addr != event_page_addr) { 63 if (synic->event_page_addr) { 64 memory_region_del_subregion(get_system_memory(), 65 &synic->event_page_mr); 66 } 67 if (event_page_addr) { 68 memory_region_add_subregion(get_system_memory(), event_page_addr, 69 &synic->event_page_mr); 70 } 71 synic->event_page_addr = event_page_addr; 72 } 73 } 74 75 void hyperv_synic_update(CPUState *cs, bool enable, 76 hwaddr msg_page_addr, hwaddr event_page_addr) 77 { 78 SynICState *synic = get_synic(cs); 79 80 if (!synic) { 81 return; 82 } 83 84 synic_update(synic, enable, msg_page_addr, event_page_addr); 85 } 86 87 static void synic_realize(DeviceState *dev, Error **errp) 88 { 89 Object *obj = OBJECT(dev); 90 SynICState *synic = SYNIC(dev); 91 char *msgp_name, *eventp_name; 92 uint32_t vp_index; 93 94 /* memory region names have to be globally unique */ 95 vp_index = hyperv_vp_index(synic->cs); 96 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 97 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 98 99 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 100 sizeof(*synic->msg_page), &error_abort); 101 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 102 sizeof(*synic->event_page), &error_abort); 103 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 104 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 105 106 g_free(msgp_name); 107 g_free(eventp_name); 108 } 109 static void synic_reset(DeviceState *dev) 110 { 111 SynICState *synic = SYNIC(dev); 112 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 113 memset(synic->event_page, 0, sizeof(*synic->event_page)); 114 synic_update(synic, false, 0, 0); 115 } 116 117 static void synic_class_init(ObjectClass *klass, void *data) 118 { 119 DeviceClass *dc = DEVICE_CLASS(klass); 120 121 dc->realize = synic_realize; 122 dc->reset = synic_reset; 123 dc->user_creatable = false; 124 } 125 126 void hyperv_synic_add(CPUState *cs) 127 { 128 Object *obj; 129 SynICState *synic; 130 131 obj = object_new(TYPE_SYNIC); 132 synic = SYNIC(obj); 133 synic->cs = cs; 134 object_property_add_child(OBJECT(cs), "synic", obj); 135 object_unref(obj); 136 object_property_set_bool(obj, true, "realized", &error_abort); 137 } 138 139 void hyperv_synic_reset(CPUState *cs) 140 { 141 SynICState *synic = get_synic(cs); 142 143 if (synic) { 144 device_legacy_reset(DEVICE(synic)); 145 } 146 } 147 148 static const TypeInfo synic_type_info = { 149 .name = TYPE_SYNIC, 150 .parent = TYPE_DEVICE, 151 .instance_size = sizeof(SynICState), 152 .class_init = synic_class_init, 153 }; 154 155 static void synic_register_types(void) 156 { 157 type_register_static(&synic_type_info); 158 } 159 160 type_init(synic_register_types) 161 162 /* 163 * KVM has its own message producers (SynIC timers). To guarantee 164 * serialization with both KVM vcpu and the guest cpu, the messages are first 165 * staged in an intermediate area and then posted to the SynIC message page in 166 * the vcpu thread. 167 */ 168 typedef struct HvSintStagedMessage { 169 /* message content staged by hyperv_post_msg */ 170 struct hyperv_message msg; 171 /* callback + data (r/o) to complete the processing in a BH */ 172 HvSintMsgCb cb; 173 void *cb_data; 174 /* message posting status filled by cpu_post_msg */ 175 int status; 176 /* passing the buck: */ 177 enum { 178 /* initial state */ 179 HV_STAGED_MSG_FREE, 180 /* 181 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 182 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 183 */ 184 HV_STAGED_MSG_BUSY, 185 /* 186 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 187 * notify the guest, records the status, marks the posting done (BUSY 188 * -> POSTED), and schedules sint_msg_bh BH 189 */ 190 HV_STAGED_MSG_POSTED, 191 /* 192 * sint_msg_bh (BH) verifies that the posting is done, runs the 193 * callback, and starts over (POSTED -> FREE) 194 */ 195 } state; 196 } HvSintStagedMessage; 197 198 struct HvSintRoute { 199 uint32_t sint; 200 SynICState *synic; 201 int gsi; 202 EventNotifier sint_set_notifier; 203 EventNotifier sint_ack_notifier; 204 205 HvSintStagedMessage *staged_msg; 206 207 unsigned refcount; 208 }; 209 210 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 211 { 212 CPUState *cs = qemu_get_cpu(vp_index); 213 assert(hyperv_vp_index(cs) == vp_index); 214 return cs; 215 } 216 217 /* 218 * BH to complete the processing of a staged message. 219 */ 220 static void sint_msg_bh(void *opaque) 221 { 222 HvSintRoute *sint_route = opaque; 223 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 224 225 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 226 /* status nor ready yet (spurious ack from guest?), ignore */ 227 return; 228 } 229 230 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 231 staged_msg->status = 0; 232 233 /* staged message processing finished, ready to start over */ 234 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 235 /* drop the reference taken in hyperv_post_msg */ 236 hyperv_sint_route_unref(sint_route); 237 } 238 239 /* 240 * Worker to transfer the message from the staging area into the SynIC message 241 * page in vcpu context. 242 */ 243 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 244 { 245 HvSintRoute *sint_route = data.host_ptr; 246 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 247 SynICState *synic = sint_route->synic; 248 struct hyperv_message *dst_msg; 249 bool wait_for_sint_ack = false; 250 251 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 252 253 if (!synic->enabled || !synic->msg_page_addr) { 254 staged_msg->status = -ENXIO; 255 goto posted; 256 } 257 258 dst_msg = &synic->msg_page->slot[sint_route->sint]; 259 260 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 261 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 262 staged_msg->status = -EAGAIN; 263 wait_for_sint_ack = true; 264 } else { 265 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 266 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 267 } 268 269 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 270 271 posted: 272 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 273 /* 274 * Notify the msg originator of the progress made; if the slot was busy we 275 * set msg_pending flag in it so it will be the guest who will do EOM and 276 * trigger the notification from KVM via sint_ack_notifier 277 */ 278 if (!wait_for_sint_ack) { 279 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 280 sint_route); 281 } 282 } 283 284 /* 285 * Post a Hyper-V message to the staging area, for delivery to guest in the 286 * vcpu thread. 287 */ 288 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 289 { 290 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 291 292 assert(staged_msg); 293 294 /* grab the staging area */ 295 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 296 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 297 return -EAGAIN; 298 } 299 300 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 301 302 /* hold a reference on sint_route until the callback is finished */ 303 hyperv_sint_route_ref(sint_route); 304 305 /* schedule message posting attempt in vcpu thread */ 306 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 307 RUN_ON_CPU_HOST_PTR(sint_route)); 308 return 0; 309 } 310 311 static void sint_ack_handler(EventNotifier *notifier) 312 { 313 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 314 sint_ack_notifier); 315 event_notifier_test_and_clear(notifier); 316 317 /* 318 * the guest consumed the previous message so complete the current one with 319 * -EAGAIN and let the msg originator retry 320 */ 321 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 322 } 323 324 /* 325 * Set given event flag for a given sint on a given vcpu, and signal the sint. 326 */ 327 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 328 { 329 int ret; 330 SynICState *synic = sint_route->synic; 331 unsigned long *flags, set_mask; 332 unsigned set_idx; 333 334 if (eventno > HV_EVENT_FLAGS_COUNT) { 335 return -EINVAL; 336 } 337 if (!synic->enabled || !synic->event_page_addr) { 338 return -ENXIO; 339 } 340 341 set_idx = BIT_WORD(eventno); 342 set_mask = BIT_MASK(eventno); 343 flags = synic->event_page->slot[sint_route->sint].flags; 344 345 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 346 memory_region_set_dirty(&synic->event_page_mr, 0, 347 sizeof(*synic->event_page)); 348 ret = hyperv_sint_route_set_sint(sint_route); 349 } else { 350 ret = 0; 351 } 352 return ret; 353 } 354 355 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 356 HvSintMsgCb cb, void *cb_data) 357 { 358 HvSintRoute *sint_route; 359 EventNotifier *ack_notifier; 360 int r, gsi; 361 CPUState *cs; 362 SynICState *synic; 363 364 cs = hyperv_find_vcpu(vp_index); 365 if (!cs) { 366 return NULL; 367 } 368 369 synic = get_synic(cs); 370 if (!synic) { 371 return NULL; 372 } 373 374 sint_route = g_new0(HvSintRoute, 1); 375 r = event_notifier_init(&sint_route->sint_set_notifier, false); 376 if (r) { 377 goto err; 378 } 379 380 381 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 382 if (ack_notifier) { 383 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 384 sint_route->staged_msg->cb = cb; 385 sint_route->staged_msg->cb_data = cb_data; 386 387 r = event_notifier_init(ack_notifier, false); 388 if (r) { 389 goto err_sint_set_notifier; 390 } 391 392 event_notifier_set_handler(ack_notifier, sint_ack_handler); 393 } 394 395 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 396 if (gsi < 0) { 397 goto err_gsi; 398 } 399 400 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 401 &sint_route->sint_set_notifier, 402 ack_notifier, gsi); 403 if (r) { 404 goto err_irqfd; 405 } 406 sint_route->gsi = gsi; 407 sint_route->synic = synic; 408 sint_route->sint = sint; 409 sint_route->refcount = 1; 410 411 return sint_route; 412 413 err_irqfd: 414 kvm_irqchip_release_virq(kvm_state, gsi); 415 err_gsi: 416 if (ack_notifier) { 417 event_notifier_set_handler(ack_notifier, NULL); 418 event_notifier_cleanup(ack_notifier); 419 g_free(sint_route->staged_msg); 420 } 421 err_sint_set_notifier: 422 event_notifier_cleanup(&sint_route->sint_set_notifier); 423 err: 424 g_free(sint_route); 425 426 return NULL; 427 } 428 429 void hyperv_sint_route_ref(HvSintRoute *sint_route) 430 { 431 sint_route->refcount++; 432 } 433 434 void hyperv_sint_route_unref(HvSintRoute *sint_route) 435 { 436 if (!sint_route) { 437 return; 438 } 439 440 assert(sint_route->refcount > 0); 441 442 if (--sint_route->refcount) { 443 return; 444 } 445 446 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 447 &sint_route->sint_set_notifier, 448 sint_route->gsi); 449 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 450 if (sint_route->staged_msg) { 451 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 452 event_notifier_cleanup(&sint_route->sint_ack_notifier); 453 g_free(sint_route->staged_msg); 454 } 455 event_notifier_cleanup(&sint_route->sint_set_notifier); 456 g_free(sint_route); 457 } 458 459 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 460 { 461 return event_notifier_set(&sint_route->sint_set_notifier); 462 } 463 464 typedef struct MsgHandler { 465 struct rcu_head rcu; 466 QLIST_ENTRY(MsgHandler) link; 467 uint32_t conn_id; 468 HvMsgHandler handler; 469 void *data; 470 } MsgHandler; 471 472 typedef struct EventFlagHandler { 473 struct rcu_head rcu; 474 QLIST_ENTRY(EventFlagHandler) link; 475 uint32_t conn_id; 476 EventNotifier *notifier; 477 } EventFlagHandler; 478 479 static QLIST_HEAD(, MsgHandler) msg_handlers; 480 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 481 static QemuMutex handlers_mutex; 482 483 static void __attribute__((constructor)) hv_init(void) 484 { 485 QLIST_INIT(&msg_handlers); 486 QLIST_INIT(&event_flag_handlers); 487 qemu_mutex_init(&handlers_mutex); 488 } 489 490 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 491 { 492 int ret; 493 MsgHandler *mh; 494 495 QEMU_LOCK_GUARD(&handlers_mutex); 496 QLIST_FOREACH(mh, &msg_handlers, link) { 497 if (mh->conn_id == conn_id) { 498 if (handler) { 499 ret = -EEXIST; 500 } else { 501 QLIST_REMOVE_RCU(mh, link); 502 g_free_rcu(mh, rcu); 503 ret = 0; 504 } 505 return ret; 506 } 507 } 508 509 if (handler) { 510 mh = g_new(MsgHandler, 1); 511 mh->conn_id = conn_id; 512 mh->handler = handler; 513 mh->data = data; 514 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 515 ret = 0; 516 } else { 517 ret = -ENOENT; 518 } 519 520 return ret; 521 } 522 523 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 524 { 525 uint16_t ret; 526 hwaddr len; 527 struct hyperv_post_message_input *msg; 528 MsgHandler *mh; 529 530 if (fast) { 531 return HV_STATUS_INVALID_HYPERCALL_CODE; 532 } 533 if (param & (__alignof__(*msg) - 1)) { 534 return HV_STATUS_INVALID_ALIGNMENT; 535 } 536 537 len = sizeof(*msg); 538 msg = cpu_physical_memory_map(param, &len, 0); 539 if (len < sizeof(*msg)) { 540 ret = HV_STATUS_INSUFFICIENT_MEMORY; 541 goto unmap; 542 } 543 if (msg->payload_size > sizeof(msg->payload)) { 544 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 545 goto unmap; 546 } 547 548 ret = HV_STATUS_INVALID_CONNECTION_ID; 549 WITH_RCU_READ_LOCK_GUARD() { 550 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 551 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 552 ret = mh->handler(msg, mh->data); 553 break; 554 } 555 } 556 } 557 558 unmap: 559 cpu_physical_memory_unmap(msg, len, 0, 0); 560 return ret; 561 } 562 563 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 564 { 565 int ret; 566 EventFlagHandler *handler; 567 568 QEMU_LOCK_GUARD(&handlers_mutex); 569 QLIST_FOREACH(handler, &event_flag_handlers, link) { 570 if (handler->conn_id == conn_id) { 571 if (notifier) { 572 ret = -EEXIST; 573 } else { 574 QLIST_REMOVE_RCU(handler, link); 575 g_free_rcu(handler, rcu); 576 ret = 0; 577 } 578 return ret; 579 } 580 } 581 582 if (notifier) { 583 handler = g_new(EventFlagHandler, 1); 584 handler->conn_id = conn_id; 585 handler->notifier = notifier; 586 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 587 ret = 0; 588 } else { 589 ret = -ENOENT; 590 } 591 592 return ret; 593 } 594 595 static bool process_event_flags_userspace; 596 597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 598 { 599 if (!process_event_flags_userspace && 600 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 601 process_event_flags_userspace = true; 602 603 warn_report("Hyper-V event signaling is not supported by this kernel; " 604 "using slower userspace hypercall processing"); 605 } 606 607 if (!process_event_flags_userspace) { 608 struct kvm_hyperv_eventfd hvevfd = { 609 .conn_id = conn_id, 610 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 611 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 612 }; 613 614 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 615 } 616 return set_event_flag_handler(conn_id, notifier); 617 } 618 619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 620 { 621 EventFlagHandler *handler; 622 623 if (unlikely(!fast)) { 624 hwaddr addr = param; 625 626 if (addr & (__alignof__(addr) - 1)) { 627 return HV_STATUS_INVALID_ALIGNMENT; 628 } 629 630 param = ldq_phys(&address_space_memory, addr); 631 } 632 633 /* 634 * Per spec, bits 32-47 contain the extra "flag number". However, we 635 * have no use for it, and in all known usecases it is zero, so just 636 * report lookup failure if it isn't. 637 */ 638 if (param & 0xffff00000000ULL) { 639 return HV_STATUS_INVALID_PORT_ID; 640 } 641 /* remaining bits are reserved-zero */ 642 if (param & ~HV_CONNECTION_ID_MASK) { 643 return HV_STATUS_INVALID_HYPERCALL_INPUT; 644 } 645 646 RCU_READ_LOCK_GUARD(); 647 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 648 if (handler->conn_id == param) { 649 event_notifier_set(handler->notifier); 650 return 0; 651 } 652 } 653 return HV_STATUS_INVALID_CONNECTION_ID; 654 } 655