1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/queue.h" 19 #include "qemu/rcu.h" 20 #include "qemu/rcu_queue.h" 21 #include "hw/hyperv/hyperv.h" 22 23 typedef struct SynICState { 24 DeviceState parent_obj; 25 26 CPUState *cs; 27 28 bool enabled; 29 hwaddr msg_page_addr; 30 hwaddr event_page_addr; 31 MemoryRegion msg_page_mr; 32 MemoryRegion event_page_mr; 33 struct hyperv_message_page *msg_page; 34 struct hyperv_event_flags_page *event_page; 35 } SynICState; 36 37 #define TYPE_SYNIC "hyperv-synic" 38 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) 39 40 static SynICState *get_synic(CPUState *cs) 41 { 42 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 43 } 44 45 static void synic_update(SynICState *synic, bool enable, 46 hwaddr msg_page_addr, hwaddr event_page_addr) 47 { 48 49 synic->enabled = enable; 50 if (synic->msg_page_addr != msg_page_addr) { 51 if (synic->msg_page_addr) { 52 memory_region_del_subregion(get_system_memory(), 53 &synic->msg_page_mr); 54 } 55 if (msg_page_addr) { 56 memory_region_add_subregion(get_system_memory(), msg_page_addr, 57 &synic->msg_page_mr); 58 } 59 synic->msg_page_addr = msg_page_addr; 60 } 61 if (synic->event_page_addr != event_page_addr) { 62 if (synic->event_page_addr) { 63 memory_region_del_subregion(get_system_memory(), 64 &synic->event_page_mr); 65 } 66 if (event_page_addr) { 67 memory_region_add_subregion(get_system_memory(), event_page_addr, 68 &synic->event_page_mr); 69 } 70 synic->event_page_addr = event_page_addr; 71 } 72 } 73 74 void hyperv_synic_update(CPUState *cs, bool enable, 75 hwaddr msg_page_addr, hwaddr event_page_addr) 76 { 77 SynICState *synic = get_synic(cs); 78 79 if (!synic) { 80 return; 81 } 82 83 synic_update(synic, enable, msg_page_addr, event_page_addr); 84 } 85 86 static void synic_realize(DeviceState *dev, Error **errp) 87 { 88 Object *obj = OBJECT(dev); 89 SynICState *synic = SYNIC(dev); 90 char *msgp_name, *eventp_name; 91 uint32_t vp_index; 92 93 /* memory region names have to be globally unique */ 94 vp_index = hyperv_vp_index(synic->cs); 95 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 96 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 97 98 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 99 sizeof(*synic->msg_page), &error_abort); 100 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 101 sizeof(*synic->event_page), &error_abort); 102 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 103 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 104 105 g_free(msgp_name); 106 g_free(eventp_name); 107 } 108 static void synic_reset(DeviceState *dev) 109 { 110 SynICState *synic = SYNIC(dev); 111 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 112 memset(synic->event_page, 0, sizeof(*synic->event_page)); 113 synic_update(synic, false, 0, 0); 114 } 115 116 static void synic_class_init(ObjectClass *klass, void *data) 117 { 118 DeviceClass *dc = DEVICE_CLASS(klass); 119 120 dc->realize = synic_realize; 121 dc->reset = synic_reset; 122 dc->user_creatable = false; 123 } 124 125 void hyperv_synic_add(CPUState *cs) 126 { 127 Object *obj; 128 SynICState *synic; 129 130 obj = object_new(TYPE_SYNIC); 131 synic = SYNIC(obj); 132 synic->cs = cs; 133 object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); 134 object_unref(obj); 135 object_property_set_bool(obj, true, "realized", &error_abort); 136 } 137 138 void hyperv_synic_reset(CPUState *cs) 139 { 140 SynICState *synic = get_synic(cs); 141 142 if (synic) { 143 device_reset(DEVICE(synic)); 144 } 145 } 146 147 static const TypeInfo synic_type_info = { 148 .name = TYPE_SYNIC, 149 .parent = TYPE_DEVICE, 150 .instance_size = sizeof(SynICState), 151 .class_init = synic_class_init, 152 }; 153 154 static void synic_register_types(void) 155 { 156 type_register_static(&synic_type_info); 157 } 158 159 type_init(synic_register_types) 160 161 /* 162 * KVM has its own message producers (SynIC timers). To guarantee 163 * serialization with both KVM vcpu and the guest cpu, the messages are first 164 * staged in an intermediate area and then posted to the SynIC message page in 165 * the vcpu thread. 166 */ 167 typedef struct HvSintStagedMessage { 168 /* message content staged by hyperv_post_msg */ 169 struct hyperv_message msg; 170 /* callback + data (r/o) to complete the processing in a BH */ 171 HvSintMsgCb cb; 172 void *cb_data; 173 /* message posting status filled by cpu_post_msg */ 174 int status; 175 /* passing the buck: */ 176 enum { 177 /* initial state */ 178 HV_STAGED_MSG_FREE, 179 /* 180 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 181 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 182 */ 183 HV_STAGED_MSG_BUSY, 184 /* 185 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 186 * notify the guest, records the status, marks the posting done (BUSY 187 * -> POSTED), and schedules sint_msg_bh BH 188 */ 189 HV_STAGED_MSG_POSTED, 190 /* 191 * sint_msg_bh (BH) verifies that the posting is done, runs the 192 * callback, and starts over (POSTED -> FREE) 193 */ 194 } state; 195 } HvSintStagedMessage; 196 197 struct HvSintRoute { 198 uint32_t sint; 199 SynICState *synic; 200 int gsi; 201 EventNotifier sint_set_notifier; 202 EventNotifier sint_ack_notifier; 203 204 HvSintStagedMessage *staged_msg; 205 206 unsigned refcount; 207 }; 208 209 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 210 { 211 CPUState *cs = qemu_get_cpu(vp_index); 212 assert(hyperv_vp_index(cs) == vp_index); 213 return cs; 214 } 215 216 /* 217 * BH to complete the processing of a staged message. 218 */ 219 static void sint_msg_bh(void *opaque) 220 { 221 HvSintRoute *sint_route = opaque; 222 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 223 224 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 225 /* status nor ready yet (spurious ack from guest?), ignore */ 226 return; 227 } 228 229 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 230 staged_msg->status = 0; 231 232 /* staged message processing finished, ready to start over */ 233 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 234 /* drop the reference taken in hyperv_post_msg */ 235 hyperv_sint_route_unref(sint_route); 236 } 237 238 /* 239 * Worker to transfer the message from the staging area into the SynIC message 240 * page in vcpu context. 241 */ 242 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 243 { 244 HvSintRoute *sint_route = data.host_ptr; 245 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 246 SynICState *synic = sint_route->synic; 247 struct hyperv_message *dst_msg; 248 bool wait_for_sint_ack = false; 249 250 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 251 252 if (!synic->enabled || !synic->msg_page_addr) { 253 staged_msg->status = -ENXIO; 254 goto posted; 255 } 256 257 dst_msg = &synic->msg_page->slot[sint_route->sint]; 258 259 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 260 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 261 staged_msg->status = -EAGAIN; 262 wait_for_sint_ack = true; 263 } else { 264 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 265 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 266 } 267 268 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 269 270 posted: 271 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 272 /* 273 * Notify the msg originator of the progress made; if the slot was busy we 274 * set msg_pending flag in it so it will be the guest who will do EOM and 275 * trigger the notification from KVM via sint_ack_notifier 276 */ 277 if (!wait_for_sint_ack) { 278 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 279 sint_route); 280 } 281 } 282 283 /* 284 * Post a Hyper-V message to the staging area, for delivery to guest in the 285 * vcpu thread. 286 */ 287 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 288 { 289 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 290 291 assert(staged_msg); 292 293 /* grab the staging area */ 294 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 295 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 296 return -EAGAIN; 297 } 298 299 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 300 301 /* hold a reference on sint_route until the callback is finished */ 302 hyperv_sint_route_ref(sint_route); 303 304 /* schedule message posting attempt in vcpu thread */ 305 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 306 RUN_ON_CPU_HOST_PTR(sint_route)); 307 return 0; 308 } 309 310 static void sint_ack_handler(EventNotifier *notifier) 311 { 312 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 313 sint_ack_notifier); 314 event_notifier_test_and_clear(notifier); 315 316 /* 317 * the guest consumed the previous message so complete the current one with 318 * -EAGAIN and let the msg originator retry 319 */ 320 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 321 } 322 323 /* 324 * Set given event flag for a given sint on a given vcpu, and signal the sint. 325 */ 326 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 327 { 328 int ret; 329 SynICState *synic = sint_route->synic; 330 unsigned long *flags, set_mask; 331 unsigned set_idx; 332 333 if (eventno > HV_EVENT_FLAGS_COUNT) { 334 return -EINVAL; 335 } 336 if (!synic->enabled || !synic->event_page_addr) { 337 return -ENXIO; 338 } 339 340 set_idx = BIT_WORD(eventno); 341 set_mask = BIT_MASK(eventno); 342 flags = synic->event_page->slot[sint_route->sint].flags; 343 344 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 345 memory_region_set_dirty(&synic->event_page_mr, 0, 346 sizeof(*synic->event_page)); 347 ret = hyperv_sint_route_set_sint(sint_route); 348 } else { 349 ret = 0; 350 } 351 return ret; 352 } 353 354 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 355 HvSintMsgCb cb, void *cb_data) 356 { 357 HvSintRoute *sint_route; 358 EventNotifier *ack_notifier; 359 int r, gsi; 360 CPUState *cs; 361 SynICState *synic; 362 363 cs = hyperv_find_vcpu(vp_index); 364 if (!cs) { 365 return NULL; 366 } 367 368 synic = get_synic(cs); 369 if (!synic) { 370 return NULL; 371 } 372 373 sint_route = g_new0(HvSintRoute, 1); 374 r = event_notifier_init(&sint_route->sint_set_notifier, false); 375 if (r) { 376 goto err; 377 } 378 379 380 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 381 if (ack_notifier) { 382 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 383 sint_route->staged_msg->cb = cb; 384 sint_route->staged_msg->cb_data = cb_data; 385 386 r = event_notifier_init(ack_notifier, false); 387 if (r) { 388 goto err_sint_set_notifier; 389 } 390 391 event_notifier_set_handler(ack_notifier, sint_ack_handler); 392 } 393 394 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 395 if (gsi < 0) { 396 goto err_gsi; 397 } 398 399 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 400 &sint_route->sint_set_notifier, 401 ack_notifier, gsi); 402 if (r) { 403 goto err_irqfd; 404 } 405 sint_route->gsi = gsi; 406 sint_route->synic = synic; 407 sint_route->sint = sint; 408 sint_route->refcount = 1; 409 410 return sint_route; 411 412 err_irqfd: 413 kvm_irqchip_release_virq(kvm_state, gsi); 414 err_gsi: 415 if (ack_notifier) { 416 event_notifier_set_handler(ack_notifier, NULL); 417 event_notifier_cleanup(ack_notifier); 418 g_free(sint_route->staged_msg); 419 } 420 err_sint_set_notifier: 421 event_notifier_cleanup(&sint_route->sint_set_notifier); 422 err: 423 g_free(sint_route); 424 425 return NULL; 426 } 427 428 void hyperv_sint_route_ref(HvSintRoute *sint_route) 429 { 430 sint_route->refcount++; 431 } 432 433 void hyperv_sint_route_unref(HvSintRoute *sint_route) 434 { 435 if (!sint_route) { 436 return; 437 } 438 439 assert(sint_route->refcount > 0); 440 441 if (--sint_route->refcount) { 442 return; 443 } 444 445 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 446 &sint_route->sint_set_notifier, 447 sint_route->gsi); 448 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 449 if (sint_route->staged_msg) { 450 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 451 event_notifier_cleanup(&sint_route->sint_ack_notifier); 452 g_free(sint_route->staged_msg); 453 } 454 event_notifier_cleanup(&sint_route->sint_set_notifier); 455 g_free(sint_route); 456 } 457 458 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 459 { 460 return event_notifier_set(&sint_route->sint_set_notifier); 461 } 462 463 typedef struct MsgHandler { 464 struct rcu_head rcu; 465 QLIST_ENTRY(MsgHandler) link; 466 uint32_t conn_id; 467 HvMsgHandler handler; 468 void *data; 469 } MsgHandler; 470 471 typedef struct EventFlagHandler { 472 struct rcu_head rcu; 473 QLIST_ENTRY(EventFlagHandler) link; 474 uint32_t conn_id; 475 EventNotifier *notifier; 476 } EventFlagHandler; 477 478 static QLIST_HEAD(, MsgHandler) msg_handlers; 479 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 480 static QemuMutex handlers_mutex; 481 482 static void __attribute__((constructor)) hv_init(void) 483 { 484 QLIST_INIT(&msg_handlers); 485 QLIST_INIT(&event_flag_handlers); 486 qemu_mutex_init(&handlers_mutex); 487 } 488 489 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 490 { 491 int ret; 492 MsgHandler *mh; 493 494 qemu_mutex_lock(&handlers_mutex); 495 QLIST_FOREACH(mh, &msg_handlers, link) { 496 if (mh->conn_id == conn_id) { 497 if (handler) { 498 ret = -EEXIST; 499 } else { 500 QLIST_REMOVE_RCU(mh, link); 501 g_free_rcu(mh, rcu); 502 ret = 0; 503 } 504 goto unlock; 505 } 506 } 507 508 if (handler) { 509 mh = g_new(MsgHandler, 1); 510 mh->conn_id = conn_id; 511 mh->handler = handler; 512 mh->data = data; 513 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 514 ret = 0; 515 } else { 516 ret = -ENOENT; 517 } 518 unlock: 519 qemu_mutex_unlock(&handlers_mutex); 520 return ret; 521 } 522 523 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 524 { 525 uint16_t ret; 526 hwaddr len; 527 struct hyperv_post_message_input *msg; 528 MsgHandler *mh; 529 530 if (fast) { 531 return HV_STATUS_INVALID_HYPERCALL_CODE; 532 } 533 if (param & (__alignof__(*msg) - 1)) { 534 return HV_STATUS_INVALID_ALIGNMENT; 535 } 536 537 len = sizeof(*msg); 538 msg = cpu_physical_memory_map(param, &len, 0); 539 if (len < sizeof(*msg)) { 540 ret = HV_STATUS_INSUFFICIENT_MEMORY; 541 goto unmap; 542 } 543 if (msg->payload_size > sizeof(msg->payload)) { 544 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 545 goto unmap; 546 } 547 548 ret = HV_STATUS_INVALID_CONNECTION_ID; 549 rcu_read_lock(); 550 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 551 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 552 ret = mh->handler(msg, mh->data); 553 break; 554 } 555 } 556 rcu_read_unlock(); 557 558 unmap: 559 cpu_physical_memory_unmap(msg, len, 0, 0); 560 return ret; 561 } 562 563 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 564 { 565 int ret; 566 EventFlagHandler *handler; 567 568 qemu_mutex_lock(&handlers_mutex); 569 QLIST_FOREACH(handler, &event_flag_handlers, link) { 570 if (handler->conn_id == conn_id) { 571 if (notifier) { 572 ret = -EEXIST; 573 } else { 574 QLIST_REMOVE_RCU(handler, link); 575 g_free_rcu(handler, rcu); 576 ret = 0; 577 } 578 goto unlock; 579 } 580 } 581 582 if (notifier) { 583 handler = g_new(EventFlagHandler, 1); 584 handler->conn_id = conn_id; 585 handler->notifier = notifier; 586 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 587 ret = 0; 588 } else { 589 ret = -ENOENT; 590 } 591 unlock: 592 qemu_mutex_unlock(&handlers_mutex); 593 return ret; 594 } 595 596 static bool process_event_flags_userspace; 597 598 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 599 { 600 if (!process_event_flags_userspace && 601 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 602 process_event_flags_userspace = true; 603 604 warn_report("Hyper-V event signaling is not supported by this kernel; " 605 "using slower userspace hypercall processing"); 606 } 607 608 if (!process_event_flags_userspace) { 609 struct kvm_hyperv_eventfd hvevfd = { 610 .conn_id = conn_id, 611 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 612 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 613 }; 614 615 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 616 } 617 return set_event_flag_handler(conn_id, notifier); 618 } 619 620 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 621 { 622 uint16_t ret; 623 EventFlagHandler *handler; 624 625 if (unlikely(!fast)) { 626 hwaddr addr = param; 627 628 if (addr & (__alignof__(addr) - 1)) { 629 return HV_STATUS_INVALID_ALIGNMENT; 630 } 631 632 param = ldq_phys(&address_space_memory, addr); 633 } 634 635 /* 636 * Per spec, bits 32-47 contain the extra "flag number". However, we 637 * have no use for it, and in all known usecases it is zero, so just 638 * report lookup failure if it isn't. 639 */ 640 if (param & 0xffff00000000ULL) { 641 return HV_STATUS_INVALID_PORT_ID; 642 } 643 /* remaining bits are reserved-zero */ 644 if (param & ~HV_CONNECTION_ID_MASK) { 645 return HV_STATUS_INVALID_HYPERCALL_INPUT; 646 } 647 648 ret = HV_STATUS_INVALID_CONNECTION_ID; 649 rcu_read_lock(); 650 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 651 if (handler->conn_id == param) { 652 event_notifier_set(handler->notifier); 653 ret = 0; 654 break; 655 } 656 } 657 rcu_read_unlock(); 658 return ret; 659 } 660