1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qapi/error.h" 13 #include "exec/address-spaces.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/bitops.h" 16 #include "qemu/error-report.h" 17 #include "qemu/queue.h" 18 #include "qemu/rcu.h" 19 #include "qemu/rcu_queue.h" 20 #include "hw/hyperv/hyperv.h" 21 22 typedef struct SynICState { 23 DeviceState parent_obj; 24 25 CPUState *cs; 26 27 bool enabled; 28 hwaddr msg_page_addr; 29 hwaddr event_page_addr; 30 MemoryRegion msg_page_mr; 31 MemoryRegion event_page_mr; 32 struct hyperv_message_page *msg_page; 33 struct hyperv_event_flags_page *event_page; 34 } SynICState; 35 36 #define TYPE_SYNIC "hyperv-synic" 37 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) 38 39 static SynICState *get_synic(CPUState *cs) 40 { 41 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 42 } 43 44 static void synic_update(SynICState *synic, bool enable, 45 hwaddr msg_page_addr, hwaddr event_page_addr) 46 { 47 48 synic->enabled = enable; 49 if (synic->msg_page_addr != msg_page_addr) { 50 if (synic->msg_page_addr) { 51 memory_region_del_subregion(get_system_memory(), 52 &synic->msg_page_mr); 53 } 54 if (msg_page_addr) { 55 memory_region_add_subregion(get_system_memory(), msg_page_addr, 56 &synic->msg_page_mr); 57 } 58 synic->msg_page_addr = msg_page_addr; 59 } 60 if (synic->event_page_addr != event_page_addr) { 61 if (synic->event_page_addr) { 62 memory_region_del_subregion(get_system_memory(), 63 &synic->event_page_mr); 64 } 65 if (event_page_addr) { 66 memory_region_add_subregion(get_system_memory(), event_page_addr, 67 &synic->event_page_mr); 68 } 69 synic->event_page_addr = event_page_addr; 70 } 71 } 72 73 void hyperv_synic_update(CPUState *cs, bool enable, 74 hwaddr msg_page_addr, hwaddr event_page_addr) 75 { 76 SynICState *synic = get_synic(cs); 77 78 if (!synic) { 79 return; 80 } 81 82 synic_update(synic, enable, msg_page_addr, event_page_addr); 83 } 84 85 static void synic_realize(DeviceState *dev, Error **errp) 86 { 87 Object *obj = OBJECT(dev); 88 SynICState *synic = SYNIC(dev); 89 char *msgp_name, *eventp_name; 90 uint32_t vp_index; 91 92 /* memory region names have to be globally unique */ 93 vp_index = hyperv_vp_index(synic->cs); 94 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 95 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 96 97 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 98 sizeof(*synic->msg_page), &error_abort); 99 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 100 sizeof(*synic->event_page), &error_abort); 101 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 102 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 103 104 g_free(msgp_name); 105 g_free(eventp_name); 106 } 107 static void synic_reset(DeviceState *dev) 108 { 109 SynICState *synic = SYNIC(dev); 110 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 111 memset(synic->event_page, 0, sizeof(*synic->event_page)); 112 synic_update(synic, false, 0, 0); 113 } 114 115 static void synic_class_init(ObjectClass *klass, void *data) 116 { 117 DeviceClass *dc = DEVICE_CLASS(klass); 118 119 dc->realize = synic_realize; 120 dc->reset = synic_reset; 121 dc->user_creatable = false; 122 } 123 124 void hyperv_synic_add(CPUState *cs) 125 { 126 Object *obj; 127 SynICState *synic; 128 129 obj = object_new(TYPE_SYNIC); 130 synic = SYNIC(obj); 131 synic->cs = cs; 132 object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); 133 object_unref(obj); 134 object_property_set_bool(obj, true, "realized", &error_abort); 135 } 136 137 void hyperv_synic_reset(CPUState *cs) 138 { 139 SynICState *synic = get_synic(cs); 140 141 if (synic) { 142 device_reset(DEVICE(synic)); 143 } 144 } 145 146 static const TypeInfo synic_type_info = { 147 .name = TYPE_SYNIC, 148 .parent = TYPE_DEVICE, 149 .instance_size = sizeof(SynICState), 150 .class_init = synic_class_init, 151 }; 152 153 static void synic_register_types(void) 154 { 155 type_register_static(&synic_type_info); 156 } 157 158 type_init(synic_register_types) 159 160 /* 161 * KVM has its own message producers (SynIC timers). To guarantee 162 * serialization with both KVM vcpu and the guest cpu, the messages are first 163 * staged in an intermediate area and then posted to the SynIC message page in 164 * the vcpu thread. 165 */ 166 typedef struct HvSintStagedMessage { 167 /* message content staged by hyperv_post_msg */ 168 struct hyperv_message msg; 169 /* callback + data (r/o) to complete the processing in a BH */ 170 HvSintMsgCb cb; 171 void *cb_data; 172 /* message posting status filled by cpu_post_msg */ 173 int status; 174 /* passing the buck: */ 175 enum { 176 /* initial state */ 177 HV_STAGED_MSG_FREE, 178 /* 179 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 180 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 181 */ 182 HV_STAGED_MSG_BUSY, 183 /* 184 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 185 * notify the guest, records the status, marks the posting done (BUSY 186 * -> POSTED), and schedules sint_msg_bh BH 187 */ 188 HV_STAGED_MSG_POSTED, 189 /* 190 * sint_msg_bh (BH) verifies that the posting is done, runs the 191 * callback, and starts over (POSTED -> FREE) 192 */ 193 } state; 194 } HvSintStagedMessage; 195 196 struct HvSintRoute { 197 uint32_t sint; 198 SynICState *synic; 199 int gsi; 200 EventNotifier sint_set_notifier; 201 EventNotifier sint_ack_notifier; 202 203 HvSintStagedMessage *staged_msg; 204 205 unsigned refcount; 206 }; 207 208 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 209 { 210 CPUState *cs = qemu_get_cpu(vp_index); 211 assert(hyperv_vp_index(cs) == vp_index); 212 return cs; 213 } 214 215 /* 216 * BH to complete the processing of a staged message. 217 */ 218 static void sint_msg_bh(void *opaque) 219 { 220 HvSintRoute *sint_route = opaque; 221 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 222 223 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 224 /* status nor ready yet (spurious ack from guest?), ignore */ 225 return; 226 } 227 228 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 229 staged_msg->status = 0; 230 231 /* staged message processing finished, ready to start over */ 232 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 233 /* drop the reference taken in hyperv_post_msg */ 234 hyperv_sint_route_unref(sint_route); 235 } 236 237 /* 238 * Worker to transfer the message from the staging area into the SynIC message 239 * page in vcpu context. 240 */ 241 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 242 { 243 HvSintRoute *sint_route = data.host_ptr; 244 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 245 SynICState *synic = sint_route->synic; 246 struct hyperv_message *dst_msg; 247 bool wait_for_sint_ack = false; 248 249 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 250 251 if (!synic->enabled || !synic->msg_page_addr) { 252 staged_msg->status = -ENXIO; 253 goto posted; 254 } 255 256 dst_msg = &synic->msg_page->slot[sint_route->sint]; 257 258 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 259 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 260 staged_msg->status = -EAGAIN; 261 wait_for_sint_ack = true; 262 } else { 263 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 264 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 265 } 266 267 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 268 269 posted: 270 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 271 /* 272 * Notify the msg originator of the progress made; if the slot was busy we 273 * set msg_pending flag in it so it will be the guest who will do EOM and 274 * trigger the notification from KVM via sint_ack_notifier 275 */ 276 if (!wait_for_sint_ack) { 277 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 278 sint_route); 279 } 280 } 281 282 /* 283 * Post a Hyper-V message to the staging area, for delivery to guest in the 284 * vcpu thread. 285 */ 286 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 287 { 288 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 289 290 assert(staged_msg); 291 292 /* grab the staging area */ 293 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 294 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 295 return -EAGAIN; 296 } 297 298 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 299 300 /* hold a reference on sint_route until the callback is finished */ 301 hyperv_sint_route_ref(sint_route); 302 303 /* schedule message posting attempt in vcpu thread */ 304 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 305 RUN_ON_CPU_HOST_PTR(sint_route)); 306 return 0; 307 } 308 309 static void sint_ack_handler(EventNotifier *notifier) 310 { 311 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 312 sint_ack_notifier); 313 event_notifier_test_and_clear(notifier); 314 315 /* 316 * the guest consumed the previous message so complete the current one with 317 * -EAGAIN and let the msg originator retry 318 */ 319 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 320 } 321 322 /* 323 * Set given event flag for a given sint on a given vcpu, and signal the sint. 324 */ 325 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 326 { 327 int ret; 328 SynICState *synic = sint_route->synic; 329 unsigned long *flags, set_mask; 330 unsigned set_idx; 331 332 if (eventno > HV_EVENT_FLAGS_COUNT) { 333 return -EINVAL; 334 } 335 if (!synic->enabled || !synic->event_page_addr) { 336 return -ENXIO; 337 } 338 339 set_idx = BIT_WORD(eventno); 340 set_mask = BIT_MASK(eventno); 341 flags = synic->event_page->slot[sint_route->sint].flags; 342 343 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 344 memory_region_set_dirty(&synic->event_page_mr, 0, 345 sizeof(*synic->event_page)); 346 ret = hyperv_sint_route_set_sint(sint_route); 347 } else { 348 ret = 0; 349 } 350 return ret; 351 } 352 353 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 354 HvSintMsgCb cb, void *cb_data) 355 { 356 HvSintRoute *sint_route; 357 EventNotifier *ack_notifier; 358 int r, gsi; 359 CPUState *cs; 360 SynICState *synic; 361 362 cs = hyperv_find_vcpu(vp_index); 363 if (!cs) { 364 return NULL; 365 } 366 367 synic = get_synic(cs); 368 if (!synic) { 369 return NULL; 370 } 371 372 sint_route = g_new0(HvSintRoute, 1); 373 r = event_notifier_init(&sint_route->sint_set_notifier, false); 374 if (r) { 375 goto err; 376 } 377 378 379 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 380 if (ack_notifier) { 381 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 382 sint_route->staged_msg->cb = cb; 383 sint_route->staged_msg->cb_data = cb_data; 384 385 r = event_notifier_init(ack_notifier, false); 386 if (r) { 387 goto err_sint_set_notifier; 388 } 389 390 event_notifier_set_handler(ack_notifier, sint_ack_handler); 391 } 392 393 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 394 if (gsi < 0) { 395 goto err_gsi; 396 } 397 398 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 399 &sint_route->sint_set_notifier, 400 ack_notifier, gsi); 401 if (r) { 402 goto err_irqfd; 403 } 404 sint_route->gsi = gsi; 405 sint_route->synic = synic; 406 sint_route->sint = sint; 407 sint_route->refcount = 1; 408 409 return sint_route; 410 411 err_irqfd: 412 kvm_irqchip_release_virq(kvm_state, gsi); 413 err_gsi: 414 if (ack_notifier) { 415 event_notifier_set_handler(ack_notifier, NULL); 416 event_notifier_cleanup(ack_notifier); 417 g_free(sint_route->staged_msg); 418 } 419 err_sint_set_notifier: 420 event_notifier_cleanup(&sint_route->sint_set_notifier); 421 err: 422 g_free(sint_route); 423 424 return NULL; 425 } 426 427 void hyperv_sint_route_ref(HvSintRoute *sint_route) 428 { 429 sint_route->refcount++; 430 } 431 432 void hyperv_sint_route_unref(HvSintRoute *sint_route) 433 { 434 if (!sint_route) { 435 return; 436 } 437 438 assert(sint_route->refcount > 0); 439 440 if (--sint_route->refcount) { 441 return; 442 } 443 444 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 445 &sint_route->sint_set_notifier, 446 sint_route->gsi); 447 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 448 if (sint_route->staged_msg) { 449 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 450 event_notifier_cleanup(&sint_route->sint_ack_notifier); 451 g_free(sint_route->staged_msg); 452 } 453 event_notifier_cleanup(&sint_route->sint_set_notifier); 454 g_free(sint_route); 455 } 456 457 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 458 { 459 return event_notifier_set(&sint_route->sint_set_notifier); 460 } 461 462 typedef struct MsgHandler { 463 struct rcu_head rcu; 464 QLIST_ENTRY(MsgHandler) link; 465 uint32_t conn_id; 466 HvMsgHandler handler; 467 void *data; 468 } MsgHandler; 469 470 typedef struct EventFlagHandler { 471 struct rcu_head rcu; 472 QLIST_ENTRY(EventFlagHandler) link; 473 uint32_t conn_id; 474 EventNotifier *notifier; 475 } EventFlagHandler; 476 477 static QLIST_HEAD(, MsgHandler) msg_handlers; 478 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 479 static QemuMutex handlers_mutex; 480 481 static void __attribute__((constructor)) hv_init(void) 482 { 483 QLIST_INIT(&msg_handlers); 484 QLIST_INIT(&event_flag_handlers); 485 qemu_mutex_init(&handlers_mutex); 486 } 487 488 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 489 { 490 int ret; 491 MsgHandler *mh; 492 493 qemu_mutex_lock(&handlers_mutex); 494 QLIST_FOREACH(mh, &msg_handlers, link) { 495 if (mh->conn_id == conn_id) { 496 if (handler) { 497 ret = -EEXIST; 498 } else { 499 QLIST_REMOVE_RCU(mh, link); 500 g_free_rcu(mh, rcu); 501 ret = 0; 502 } 503 goto unlock; 504 } 505 } 506 507 if (handler) { 508 mh = g_new(MsgHandler, 1); 509 mh->conn_id = conn_id; 510 mh->handler = handler; 511 mh->data = data; 512 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 513 ret = 0; 514 } else { 515 ret = -ENOENT; 516 } 517 unlock: 518 qemu_mutex_unlock(&handlers_mutex); 519 return ret; 520 } 521 522 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 523 { 524 uint16_t ret; 525 hwaddr len; 526 struct hyperv_post_message_input *msg; 527 MsgHandler *mh; 528 529 if (fast) { 530 return HV_STATUS_INVALID_HYPERCALL_CODE; 531 } 532 if (param & (__alignof__(*msg) - 1)) { 533 return HV_STATUS_INVALID_ALIGNMENT; 534 } 535 536 len = sizeof(*msg); 537 msg = cpu_physical_memory_map(param, &len, 0); 538 if (len < sizeof(*msg)) { 539 ret = HV_STATUS_INSUFFICIENT_MEMORY; 540 goto unmap; 541 } 542 if (msg->payload_size > sizeof(msg->payload)) { 543 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 544 goto unmap; 545 } 546 547 ret = HV_STATUS_INVALID_CONNECTION_ID; 548 rcu_read_lock(); 549 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 550 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 551 ret = mh->handler(msg, mh->data); 552 break; 553 } 554 } 555 rcu_read_unlock(); 556 557 unmap: 558 cpu_physical_memory_unmap(msg, len, 0, 0); 559 return ret; 560 } 561 562 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 563 { 564 int ret; 565 EventFlagHandler *handler; 566 567 qemu_mutex_lock(&handlers_mutex); 568 QLIST_FOREACH(handler, &event_flag_handlers, link) { 569 if (handler->conn_id == conn_id) { 570 if (notifier) { 571 ret = -EEXIST; 572 } else { 573 QLIST_REMOVE_RCU(handler, link); 574 g_free_rcu(handler, rcu); 575 ret = 0; 576 } 577 goto unlock; 578 } 579 } 580 581 if (notifier) { 582 handler = g_new(EventFlagHandler, 1); 583 handler->conn_id = conn_id; 584 handler->notifier = notifier; 585 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 586 ret = 0; 587 } else { 588 ret = -ENOENT; 589 } 590 unlock: 591 qemu_mutex_unlock(&handlers_mutex); 592 return ret; 593 } 594 595 static bool process_event_flags_userspace; 596 597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 598 { 599 if (!process_event_flags_userspace && 600 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 601 process_event_flags_userspace = true; 602 603 warn_report("Hyper-V event signaling is not supported by this kernel; " 604 "using slower userspace hypercall processing"); 605 } 606 607 if (!process_event_flags_userspace) { 608 struct kvm_hyperv_eventfd hvevfd = { 609 .conn_id = conn_id, 610 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 611 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 612 }; 613 614 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 615 } 616 return set_event_flag_handler(conn_id, notifier); 617 } 618 619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 620 { 621 uint16_t ret; 622 EventFlagHandler *handler; 623 624 if (unlikely(!fast)) { 625 hwaddr addr = param; 626 627 if (addr & (__alignof__(addr) - 1)) { 628 return HV_STATUS_INVALID_ALIGNMENT; 629 } 630 631 param = ldq_phys(&address_space_memory, addr); 632 } 633 634 /* 635 * Per spec, bits 32-47 contain the extra "flag number". However, we 636 * have no use for it, and in all known usecases it is zero, so just 637 * report lookup failure if it isn't. 638 */ 639 if (param & 0xffff00000000ULL) { 640 return HV_STATUS_INVALID_PORT_ID; 641 } 642 /* remaining bits are reserved-zero */ 643 if (param & ~HV_CONNECTION_ID_MASK) { 644 return HV_STATUS_INVALID_HYPERCALL_INPUT; 645 } 646 647 ret = HV_STATUS_INVALID_CONNECTION_ID; 648 rcu_read_lock(); 649 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 650 if (handler->conn_id == param) { 651 event_notifier_set(handler->notifier); 652 ret = 0; 653 break; 654 } 655 } 656 rcu_read_unlock(); 657 return ret; 658 } 659