xref: /openbmc/qemu/hw/hyperv/hyperv.c (revision 6e2e2e8a)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 #include "qom/object.h"
24 
25 struct SynICState {
26     DeviceState parent_obj;
27 
28     CPUState *cs;
29 
30     bool enabled;
31     hwaddr msg_page_addr;
32     hwaddr event_page_addr;
33     MemoryRegion msg_page_mr;
34     MemoryRegion event_page_mr;
35     struct hyperv_message_page *msg_page;
36     struct hyperv_event_flags_page *event_page;
37 };
38 
39 #define TYPE_SYNIC "hyperv-synic"
40 OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
41 
42 static bool synic_enabled;
43 
44 bool hyperv_is_synic_enabled(void)
45 {
46     return synic_enabled;
47 }
48 
49 static SynICState *get_synic(CPUState *cs)
50 {
51     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
52 }
53 
54 static void synic_update(SynICState *synic, bool enable,
55                          hwaddr msg_page_addr, hwaddr event_page_addr)
56 {
57 
58     synic->enabled = enable;
59     if (synic->msg_page_addr != msg_page_addr) {
60         if (synic->msg_page_addr) {
61             memory_region_del_subregion(get_system_memory(),
62                                         &synic->msg_page_mr);
63         }
64         if (msg_page_addr) {
65             memory_region_add_subregion(get_system_memory(), msg_page_addr,
66                                         &synic->msg_page_mr);
67         }
68         synic->msg_page_addr = msg_page_addr;
69     }
70     if (synic->event_page_addr != event_page_addr) {
71         if (synic->event_page_addr) {
72             memory_region_del_subregion(get_system_memory(),
73                                         &synic->event_page_mr);
74         }
75         if (event_page_addr) {
76             memory_region_add_subregion(get_system_memory(), event_page_addr,
77                                         &synic->event_page_mr);
78         }
79         synic->event_page_addr = event_page_addr;
80     }
81 }
82 
83 void hyperv_synic_update(CPUState *cs, bool enable,
84                          hwaddr msg_page_addr, hwaddr event_page_addr)
85 {
86     SynICState *synic = get_synic(cs);
87 
88     if (!synic) {
89         return;
90     }
91 
92     synic_update(synic, enable, msg_page_addr, event_page_addr);
93 }
94 
95 static void synic_realize(DeviceState *dev, Error **errp)
96 {
97     Object *obj = OBJECT(dev);
98     SynICState *synic = SYNIC(dev);
99     char *msgp_name, *eventp_name;
100     uint32_t vp_index;
101 
102     /* memory region names have to be globally unique */
103     vp_index = hyperv_vp_index(synic->cs);
104     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
105     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
106 
107     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
108                            sizeof(*synic->msg_page), &error_abort);
109     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
110                            sizeof(*synic->event_page), &error_abort);
111     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
112     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
113 
114     g_free(msgp_name);
115     g_free(eventp_name);
116 }
117 static void synic_reset(DeviceState *dev)
118 {
119     SynICState *synic = SYNIC(dev);
120     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
121     memset(synic->event_page, 0, sizeof(*synic->event_page));
122     synic_update(synic, false, 0, 0);
123 }
124 
125 static void synic_class_init(ObjectClass *klass, void *data)
126 {
127     DeviceClass *dc = DEVICE_CLASS(klass);
128 
129     dc->realize = synic_realize;
130     dc->reset = synic_reset;
131     dc->user_creatable = false;
132 }
133 
134 void hyperv_synic_add(CPUState *cs)
135 {
136     Object *obj;
137     SynICState *synic;
138 
139     obj = object_new(TYPE_SYNIC);
140     synic = SYNIC(obj);
141     synic->cs = cs;
142     object_property_add_child(OBJECT(cs), "synic", obj);
143     object_unref(obj);
144     qdev_realize(DEVICE(obj), NULL, &error_abort);
145     synic_enabled = true;
146 }
147 
148 void hyperv_synic_reset(CPUState *cs)
149 {
150     SynICState *synic = get_synic(cs);
151 
152     if (synic) {
153         device_legacy_reset(DEVICE(synic));
154     }
155 }
156 
157 static const TypeInfo synic_type_info = {
158     .name = TYPE_SYNIC,
159     .parent = TYPE_DEVICE,
160     .instance_size = sizeof(SynICState),
161     .class_init = synic_class_init,
162 };
163 
164 static void synic_register_types(void)
165 {
166     type_register_static(&synic_type_info);
167 }
168 
169 type_init(synic_register_types)
170 
171 /*
172  * KVM has its own message producers (SynIC timers).  To guarantee
173  * serialization with both KVM vcpu and the guest cpu, the messages are first
174  * staged in an intermediate area and then posted to the SynIC message page in
175  * the vcpu thread.
176  */
177 typedef struct HvSintStagedMessage {
178     /* message content staged by hyperv_post_msg */
179     struct hyperv_message msg;
180     /* callback + data (r/o) to complete the processing in a BH */
181     HvSintMsgCb cb;
182     void *cb_data;
183     /* message posting status filled by cpu_post_msg */
184     int status;
185     /* passing the buck: */
186     enum {
187         /* initial state */
188         HV_STAGED_MSG_FREE,
189         /*
190          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
191          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
192          */
193         HV_STAGED_MSG_BUSY,
194         /*
195          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
196          * notify the guest, records the status, marks the posting done (BUSY
197          * -> POSTED), and schedules sint_msg_bh BH
198          */
199         HV_STAGED_MSG_POSTED,
200         /*
201          * sint_msg_bh (BH) verifies that the posting is done, runs the
202          * callback, and starts over (POSTED -> FREE)
203          */
204     } state;
205 } HvSintStagedMessage;
206 
207 struct HvSintRoute {
208     uint32_t sint;
209     SynICState *synic;
210     int gsi;
211     EventNotifier sint_set_notifier;
212     EventNotifier sint_ack_notifier;
213 
214     HvSintStagedMessage *staged_msg;
215 
216     unsigned refcount;
217 };
218 
219 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
220 {
221     CPUState *cs = qemu_get_cpu(vp_index);
222     assert(hyperv_vp_index(cs) == vp_index);
223     return cs;
224 }
225 
226 /*
227  * BH to complete the processing of a staged message.
228  */
229 static void sint_msg_bh(void *opaque)
230 {
231     HvSintRoute *sint_route = opaque;
232     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
233 
234     if (qatomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
235         /* status nor ready yet (spurious ack from guest?), ignore */
236         return;
237     }
238 
239     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
240     staged_msg->status = 0;
241 
242     /* staged message processing finished, ready to start over */
243     qatomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
244     /* drop the reference taken in hyperv_post_msg */
245     hyperv_sint_route_unref(sint_route);
246 }
247 
248 /*
249  * Worker to transfer the message from the staging area into the SynIC message
250  * page in vcpu context.
251  */
252 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
253 {
254     HvSintRoute *sint_route = data.host_ptr;
255     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
256     SynICState *synic = sint_route->synic;
257     struct hyperv_message *dst_msg;
258     bool wait_for_sint_ack = false;
259 
260     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
261 
262     if (!synic->enabled || !synic->msg_page_addr) {
263         staged_msg->status = -ENXIO;
264         goto posted;
265     }
266 
267     dst_msg = &synic->msg_page->slot[sint_route->sint];
268 
269     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
270         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
271         staged_msg->status = -EAGAIN;
272         wait_for_sint_ack = true;
273     } else {
274         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
275         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
276     }
277 
278     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
279 
280 posted:
281     qatomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
282     /*
283      * Notify the msg originator of the progress made; if the slot was busy we
284      * set msg_pending flag in it so it will be the guest who will do EOM and
285      * trigger the notification from KVM via sint_ack_notifier
286      */
287     if (!wait_for_sint_ack) {
288         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
289                                 sint_route);
290     }
291 }
292 
293 /*
294  * Post a Hyper-V message to the staging area, for delivery to guest in the
295  * vcpu thread.
296  */
297 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
298 {
299     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
300 
301     assert(staged_msg);
302 
303     /* grab the staging area */
304     if (qatomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
305                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
306         return -EAGAIN;
307     }
308 
309     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
310 
311     /* hold a reference on sint_route until the callback is finished */
312     hyperv_sint_route_ref(sint_route);
313 
314     /* schedule message posting attempt in vcpu thread */
315     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
316                      RUN_ON_CPU_HOST_PTR(sint_route));
317     return 0;
318 }
319 
320 static void sint_ack_handler(EventNotifier *notifier)
321 {
322     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
323                                            sint_ack_notifier);
324     event_notifier_test_and_clear(notifier);
325 
326     /*
327      * the guest consumed the previous message so complete the current one with
328      * -EAGAIN and let the msg originator retry
329      */
330     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
331 }
332 
333 /*
334  * Set given event flag for a given sint on a given vcpu, and signal the sint.
335  */
336 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
337 {
338     int ret;
339     SynICState *synic = sint_route->synic;
340     unsigned long *flags, set_mask;
341     unsigned set_idx;
342 
343     if (eventno > HV_EVENT_FLAGS_COUNT) {
344         return -EINVAL;
345     }
346     if (!synic->enabled || !synic->event_page_addr) {
347         return -ENXIO;
348     }
349 
350     set_idx = BIT_WORD(eventno);
351     set_mask = BIT_MASK(eventno);
352     flags = synic->event_page->slot[sint_route->sint].flags;
353 
354     if ((qatomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
355         memory_region_set_dirty(&synic->event_page_mr, 0,
356                                 sizeof(*synic->event_page));
357         ret = hyperv_sint_route_set_sint(sint_route);
358     } else {
359         ret = 0;
360     }
361     return ret;
362 }
363 
364 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
365                                    HvSintMsgCb cb, void *cb_data)
366 {
367     HvSintRoute *sint_route;
368     EventNotifier *ack_notifier;
369     int r, gsi;
370     CPUState *cs;
371     SynICState *synic;
372 
373     cs = hyperv_find_vcpu(vp_index);
374     if (!cs) {
375         return NULL;
376     }
377 
378     synic = get_synic(cs);
379     if (!synic) {
380         return NULL;
381     }
382 
383     sint_route = g_new0(HvSintRoute, 1);
384     r = event_notifier_init(&sint_route->sint_set_notifier, false);
385     if (r) {
386         goto err;
387     }
388 
389 
390     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
391     if (ack_notifier) {
392         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
393         sint_route->staged_msg->cb = cb;
394         sint_route->staged_msg->cb_data = cb_data;
395 
396         r = event_notifier_init(ack_notifier, false);
397         if (r) {
398             goto err_sint_set_notifier;
399         }
400 
401         event_notifier_set_handler(ack_notifier, sint_ack_handler);
402     }
403 
404     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
405     if (gsi < 0) {
406         goto err_gsi;
407     }
408 
409     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
410                                            &sint_route->sint_set_notifier,
411                                            ack_notifier, gsi);
412     if (r) {
413         goto err_irqfd;
414     }
415     sint_route->gsi = gsi;
416     sint_route->synic = synic;
417     sint_route->sint = sint;
418     sint_route->refcount = 1;
419 
420     return sint_route;
421 
422 err_irqfd:
423     kvm_irqchip_release_virq(kvm_state, gsi);
424 err_gsi:
425     if (ack_notifier) {
426         event_notifier_set_handler(ack_notifier, NULL);
427         event_notifier_cleanup(ack_notifier);
428         g_free(sint_route->staged_msg);
429     }
430 err_sint_set_notifier:
431     event_notifier_cleanup(&sint_route->sint_set_notifier);
432 err:
433     g_free(sint_route);
434 
435     return NULL;
436 }
437 
438 void hyperv_sint_route_ref(HvSintRoute *sint_route)
439 {
440     sint_route->refcount++;
441 }
442 
443 void hyperv_sint_route_unref(HvSintRoute *sint_route)
444 {
445     if (!sint_route) {
446         return;
447     }
448 
449     assert(sint_route->refcount > 0);
450 
451     if (--sint_route->refcount) {
452         return;
453     }
454 
455     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
456                                           &sint_route->sint_set_notifier,
457                                           sint_route->gsi);
458     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
459     if (sint_route->staged_msg) {
460         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
461         event_notifier_cleanup(&sint_route->sint_ack_notifier);
462         g_free(sint_route->staged_msg);
463     }
464     event_notifier_cleanup(&sint_route->sint_set_notifier);
465     g_free(sint_route);
466 }
467 
468 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
469 {
470     return event_notifier_set(&sint_route->sint_set_notifier);
471 }
472 
473 typedef struct MsgHandler {
474     struct rcu_head rcu;
475     QLIST_ENTRY(MsgHandler) link;
476     uint32_t conn_id;
477     HvMsgHandler handler;
478     void *data;
479 } MsgHandler;
480 
481 typedef struct EventFlagHandler {
482     struct rcu_head rcu;
483     QLIST_ENTRY(EventFlagHandler) link;
484     uint32_t conn_id;
485     EventNotifier *notifier;
486 } EventFlagHandler;
487 
488 static QLIST_HEAD(, MsgHandler) msg_handlers;
489 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
490 static QemuMutex handlers_mutex;
491 
492 static void __attribute__((constructor)) hv_init(void)
493 {
494     QLIST_INIT(&msg_handlers);
495     QLIST_INIT(&event_flag_handlers);
496     qemu_mutex_init(&handlers_mutex);
497 }
498 
499 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
500 {
501     int ret;
502     MsgHandler *mh;
503 
504     QEMU_LOCK_GUARD(&handlers_mutex);
505     QLIST_FOREACH(mh, &msg_handlers, link) {
506         if (mh->conn_id == conn_id) {
507             if (handler) {
508                 ret = -EEXIST;
509             } else {
510                 QLIST_REMOVE_RCU(mh, link);
511                 g_free_rcu(mh, rcu);
512                 ret = 0;
513             }
514             return ret;
515         }
516     }
517 
518     if (handler) {
519         mh = g_new(MsgHandler, 1);
520         mh->conn_id = conn_id;
521         mh->handler = handler;
522         mh->data = data;
523         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
524         ret = 0;
525     } else {
526         ret = -ENOENT;
527     }
528 
529     return ret;
530 }
531 
532 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
533 {
534     uint16_t ret;
535     hwaddr len;
536     struct hyperv_post_message_input *msg;
537     MsgHandler *mh;
538 
539     if (fast) {
540         return HV_STATUS_INVALID_HYPERCALL_CODE;
541     }
542     if (param & (__alignof__(*msg) - 1)) {
543         return HV_STATUS_INVALID_ALIGNMENT;
544     }
545 
546     len = sizeof(*msg);
547     msg = cpu_physical_memory_map(param, &len, 0);
548     if (len < sizeof(*msg)) {
549         ret = HV_STATUS_INSUFFICIENT_MEMORY;
550         goto unmap;
551     }
552     if (msg->payload_size > sizeof(msg->payload)) {
553         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
554         goto unmap;
555     }
556 
557     ret = HV_STATUS_INVALID_CONNECTION_ID;
558     WITH_RCU_READ_LOCK_GUARD() {
559         QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
560             if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
561                 ret = mh->handler(msg, mh->data);
562                 break;
563             }
564         }
565     }
566 
567 unmap:
568     cpu_physical_memory_unmap(msg, len, 0, 0);
569     return ret;
570 }
571 
572 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
573 {
574     int ret;
575     EventFlagHandler *handler;
576 
577     QEMU_LOCK_GUARD(&handlers_mutex);
578     QLIST_FOREACH(handler, &event_flag_handlers, link) {
579         if (handler->conn_id == conn_id) {
580             if (notifier) {
581                 ret = -EEXIST;
582             } else {
583                 QLIST_REMOVE_RCU(handler, link);
584                 g_free_rcu(handler, rcu);
585                 ret = 0;
586             }
587             return ret;
588         }
589     }
590 
591     if (notifier) {
592         handler = g_new(EventFlagHandler, 1);
593         handler->conn_id = conn_id;
594         handler->notifier = notifier;
595         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
596         ret = 0;
597     } else {
598         ret = -ENOENT;
599     }
600 
601     return ret;
602 }
603 
604 static bool process_event_flags_userspace;
605 
606 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
607 {
608     if (!process_event_flags_userspace &&
609         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
610         process_event_flags_userspace = true;
611 
612         warn_report("Hyper-V event signaling is not supported by this kernel; "
613                     "using slower userspace hypercall processing");
614     }
615 
616     if (!process_event_flags_userspace) {
617         struct kvm_hyperv_eventfd hvevfd = {
618             .conn_id = conn_id,
619             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
620             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
621         };
622 
623         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
624     }
625     return set_event_flag_handler(conn_id, notifier);
626 }
627 
628 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
629 {
630     EventFlagHandler *handler;
631 
632     if (unlikely(!fast)) {
633         hwaddr addr = param;
634 
635         if (addr & (__alignof__(addr) - 1)) {
636             return HV_STATUS_INVALID_ALIGNMENT;
637         }
638 
639         param = ldq_phys(&address_space_memory, addr);
640     }
641 
642     /*
643      * Per spec, bits 32-47 contain the extra "flag number".  However, we
644      * have no use for it, and in all known usecases it is zero, so just
645      * report lookup failure if it isn't.
646      */
647     if (param & 0xffff00000000ULL) {
648         return HV_STATUS_INVALID_PORT_ID;
649     }
650     /* remaining bits are reserved-zero */
651     if (param & ~HV_CONNECTION_ID_MASK) {
652         return HV_STATUS_INVALID_HYPERCALL_INPUT;
653     }
654 
655     RCU_READ_LOCK_GUARD();
656     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
657         if (handler->conn_id == param) {
658             event_notifier_set(handler->notifier);
659             return 0;
660         }
661     }
662     return HV_STATUS_INVALID_CONNECTION_ID;
663 }
664