xref: /openbmc/qemu/hw/hyperv/hyperv.c (revision 93dd625f)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 
24 typedef struct SynICState {
25     DeviceState parent_obj;
26 
27     CPUState *cs;
28 
29     bool enabled;
30     hwaddr msg_page_addr;
31     hwaddr event_page_addr;
32     MemoryRegion msg_page_mr;
33     MemoryRegion event_page_mr;
34     struct hyperv_message_page *msg_page;
35     struct hyperv_event_flags_page *event_page;
36 } SynICState;
37 
38 #define TYPE_SYNIC "hyperv-synic"
39 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
40 
41 static bool synic_enabled;
42 
43 bool hyperv_is_synic_enabled(void)
44 {
45     return synic_enabled;
46 }
47 
48 static SynICState *get_synic(CPUState *cs)
49 {
50     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
51 }
52 
53 static void synic_update(SynICState *synic, bool enable,
54                          hwaddr msg_page_addr, hwaddr event_page_addr)
55 {
56 
57     synic->enabled = enable;
58     if (synic->msg_page_addr != msg_page_addr) {
59         if (synic->msg_page_addr) {
60             memory_region_del_subregion(get_system_memory(),
61                                         &synic->msg_page_mr);
62         }
63         if (msg_page_addr) {
64             memory_region_add_subregion(get_system_memory(), msg_page_addr,
65                                         &synic->msg_page_mr);
66         }
67         synic->msg_page_addr = msg_page_addr;
68     }
69     if (synic->event_page_addr != event_page_addr) {
70         if (synic->event_page_addr) {
71             memory_region_del_subregion(get_system_memory(),
72                                         &synic->event_page_mr);
73         }
74         if (event_page_addr) {
75             memory_region_add_subregion(get_system_memory(), event_page_addr,
76                                         &synic->event_page_mr);
77         }
78         synic->event_page_addr = event_page_addr;
79     }
80 }
81 
82 void hyperv_synic_update(CPUState *cs, bool enable,
83                          hwaddr msg_page_addr, hwaddr event_page_addr)
84 {
85     SynICState *synic = get_synic(cs);
86 
87     if (!synic) {
88         return;
89     }
90 
91     synic_update(synic, enable, msg_page_addr, event_page_addr);
92 }
93 
94 static void synic_realize(DeviceState *dev, Error **errp)
95 {
96     Object *obj = OBJECT(dev);
97     SynICState *synic = SYNIC(dev);
98     char *msgp_name, *eventp_name;
99     uint32_t vp_index;
100 
101     /* memory region names have to be globally unique */
102     vp_index = hyperv_vp_index(synic->cs);
103     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
104     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
105 
106     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
107                            sizeof(*synic->msg_page), &error_abort);
108     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
109                            sizeof(*synic->event_page), &error_abort);
110     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
111     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
112 
113     g_free(msgp_name);
114     g_free(eventp_name);
115 }
116 static void synic_reset(DeviceState *dev)
117 {
118     SynICState *synic = SYNIC(dev);
119     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
120     memset(synic->event_page, 0, sizeof(*synic->event_page));
121     synic_update(synic, false, 0, 0);
122 }
123 
124 static void synic_class_init(ObjectClass *klass, void *data)
125 {
126     DeviceClass *dc = DEVICE_CLASS(klass);
127 
128     dc->realize = synic_realize;
129     dc->reset = synic_reset;
130     dc->user_creatable = false;
131 }
132 
133 void hyperv_synic_add(CPUState *cs)
134 {
135     Object *obj;
136     SynICState *synic;
137 
138     obj = object_new(TYPE_SYNIC);
139     synic = SYNIC(obj);
140     synic->cs = cs;
141     object_property_add_child(OBJECT(cs), "synic", obj);
142     object_unref(obj);
143     qdev_realize(DEVICE(obj), NULL, &error_abort);
144     synic_enabled = true;
145 }
146 
147 void hyperv_synic_reset(CPUState *cs)
148 {
149     SynICState *synic = get_synic(cs);
150 
151     if (synic) {
152         device_legacy_reset(DEVICE(synic));
153     }
154 }
155 
156 static const TypeInfo synic_type_info = {
157     .name = TYPE_SYNIC,
158     .parent = TYPE_DEVICE,
159     .instance_size = sizeof(SynICState),
160     .class_init = synic_class_init,
161 };
162 
163 static void synic_register_types(void)
164 {
165     type_register_static(&synic_type_info);
166 }
167 
168 type_init(synic_register_types)
169 
170 /*
171  * KVM has its own message producers (SynIC timers).  To guarantee
172  * serialization with both KVM vcpu and the guest cpu, the messages are first
173  * staged in an intermediate area and then posted to the SynIC message page in
174  * the vcpu thread.
175  */
176 typedef struct HvSintStagedMessage {
177     /* message content staged by hyperv_post_msg */
178     struct hyperv_message msg;
179     /* callback + data (r/o) to complete the processing in a BH */
180     HvSintMsgCb cb;
181     void *cb_data;
182     /* message posting status filled by cpu_post_msg */
183     int status;
184     /* passing the buck: */
185     enum {
186         /* initial state */
187         HV_STAGED_MSG_FREE,
188         /*
189          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
190          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
191          */
192         HV_STAGED_MSG_BUSY,
193         /*
194          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
195          * notify the guest, records the status, marks the posting done (BUSY
196          * -> POSTED), and schedules sint_msg_bh BH
197          */
198         HV_STAGED_MSG_POSTED,
199         /*
200          * sint_msg_bh (BH) verifies that the posting is done, runs the
201          * callback, and starts over (POSTED -> FREE)
202          */
203     } state;
204 } HvSintStagedMessage;
205 
206 struct HvSintRoute {
207     uint32_t sint;
208     SynICState *synic;
209     int gsi;
210     EventNotifier sint_set_notifier;
211     EventNotifier sint_ack_notifier;
212 
213     HvSintStagedMessage *staged_msg;
214 
215     unsigned refcount;
216 };
217 
218 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
219 {
220     CPUState *cs = qemu_get_cpu(vp_index);
221     assert(hyperv_vp_index(cs) == vp_index);
222     return cs;
223 }
224 
225 /*
226  * BH to complete the processing of a staged message.
227  */
228 static void sint_msg_bh(void *opaque)
229 {
230     HvSintRoute *sint_route = opaque;
231     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
232 
233     if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
234         /* status nor ready yet (spurious ack from guest?), ignore */
235         return;
236     }
237 
238     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
239     staged_msg->status = 0;
240 
241     /* staged message processing finished, ready to start over */
242     atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
243     /* drop the reference taken in hyperv_post_msg */
244     hyperv_sint_route_unref(sint_route);
245 }
246 
247 /*
248  * Worker to transfer the message from the staging area into the SynIC message
249  * page in vcpu context.
250  */
251 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
252 {
253     HvSintRoute *sint_route = data.host_ptr;
254     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
255     SynICState *synic = sint_route->synic;
256     struct hyperv_message *dst_msg;
257     bool wait_for_sint_ack = false;
258 
259     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
260 
261     if (!synic->enabled || !synic->msg_page_addr) {
262         staged_msg->status = -ENXIO;
263         goto posted;
264     }
265 
266     dst_msg = &synic->msg_page->slot[sint_route->sint];
267 
268     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
269         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
270         staged_msg->status = -EAGAIN;
271         wait_for_sint_ack = true;
272     } else {
273         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
274         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
275     }
276 
277     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
278 
279 posted:
280     atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
281     /*
282      * Notify the msg originator of the progress made; if the slot was busy we
283      * set msg_pending flag in it so it will be the guest who will do EOM and
284      * trigger the notification from KVM via sint_ack_notifier
285      */
286     if (!wait_for_sint_ack) {
287         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
288                                 sint_route);
289     }
290 }
291 
292 /*
293  * Post a Hyper-V message to the staging area, for delivery to guest in the
294  * vcpu thread.
295  */
296 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
297 {
298     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
299 
300     assert(staged_msg);
301 
302     /* grab the staging area */
303     if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
304                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
305         return -EAGAIN;
306     }
307 
308     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
309 
310     /* hold a reference on sint_route until the callback is finished */
311     hyperv_sint_route_ref(sint_route);
312 
313     /* schedule message posting attempt in vcpu thread */
314     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
315                      RUN_ON_CPU_HOST_PTR(sint_route));
316     return 0;
317 }
318 
319 static void sint_ack_handler(EventNotifier *notifier)
320 {
321     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
322                                            sint_ack_notifier);
323     event_notifier_test_and_clear(notifier);
324 
325     /*
326      * the guest consumed the previous message so complete the current one with
327      * -EAGAIN and let the msg originator retry
328      */
329     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
330 }
331 
332 /*
333  * Set given event flag for a given sint on a given vcpu, and signal the sint.
334  */
335 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
336 {
337     int ret;
338     SynICState *synic = sint_route->synic;
339     unsigned long *flags, set_mask;
340     unsigned set_idx;
341 
342     if (eventno > HV_EVENT_FLAGS_COUNT) {
343         return -EINVAL;
344     }
345     if (!synic->enabled || !synic->event_page_addr) {
346         return -ENXIO;
347     }
348 
349     set_idx = BIT_WORD(eventno);
350     set_mask = BIT_MASK(eventno);
351     flags = synic->event_page->slot[sint_route->sint].flags;
352 
353     if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
354         memory_region_set_dirty(&synic->event_page_mr, 0,
355                                 sizeof(*synic->event_page));
356         ret = hyperv_sint_route_set_sint(sint_route);
357     } else {
358         ret = 0;
359     }
360     return ret;
361 }
362 
363 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
364                                    HvSintMsgCb cb, void *cb_data)
365 {
366     HvSintRoute *sint_route;
367     EventNotifier *ack_notifier;
368     int r, gsi;
369     CPUState *cs;
370     SynICState *synic;
371 
372     cs = hyperv_find_vcpu(vp_index);
373     if (!cs) {
374         return NULL;
375     }
376 
377     synic = get_synic(cs);
378     if (!synic) {
379         return NULL;
380     }
381 
382     sint_route = g_new0(HvSintRoute, 1);
383     r = event_notifier_init(&sint_route->sint_set_notifier, false);
384     if (r) {
385         goto err;
386     }
387 
388 
389     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
390     if (ack_notifier) {
391         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
392         sint_route->staged_msg->cb = cb;
393         sint_route->staged_msg->cb_data = cb_data;
394 
395         r = event_notifier_init(ack_notifier, false);
396         if (r) {
397             goto err_sint_set_notifier;
398         }
399 
400         event_notifier_set_handler(ack_notifier, sint_ack_handler);
401     }
402 
403     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
404     if (gsi < 0) {
405         goto err_gsi;
406     }
407 
408     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
409                                            &sint_route->sint_set_notifier,
410                                            ack_notifier, gsi);
411     if (r) {
412         goto err_irqfd;
413     }
414     sint_route->gsi = gsi;
415     sint_route->synic = synic;
416     sint_route->sint = sint;
417     sint_route->refcount = 1;
418 
419     return sint_route;
420 
421 err_irqfd:
422     kvm_irqchip_release_virq(kvm_state, gsi);
423 err_gsi:
424     if (ack_notifier) {
425         event_notifier_set_handler(ack_notifier, NULL);
426         event_notifier_cleanup(ack_notifier);
427         g_free(sint_route->staged_msg);
428     }
429 err_sint_set_notifier:
430     event_notifier_cleanup(&sint_route->sint_set_notifier);
431 err:
432     g_free(sint_route);
433 
434     return NULL;
435 }
436 
437 void hyperv_sint_route_ref(HvSintRoute *sint_route)
438 {
439     sint_route->refcount++;
440 }
441 
442 void hyperv_sint_route_unref(HvSintRoute *sint_route)
443 {
444     if (!sint_route) {
445         return;
446     }
447 
448     assert(sint_route->refcount > 0);
449 
450     if (--sint_route->refcount) {
451         return;
452     }
453 
454     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
455                                           &sint_route->sint_set_notifier,
456                                           sint_route->gsi);
457     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
458     if (sint_route->staged_msg) {
459         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
460         event_notifier_cleanup(&sint_route->sint_ack_notifier);
461         g_free(sint_route->staged_msg);
462     }
463     event_notifier_cleanup(&sint_route->sint_set_notifier);
464     g_free(sint_route);
465 }
466 
467 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
468 {
469     return event_notifier_set(&sint_route->sint_set_notifier);
470 }
471 
472 typedef struct MsgHandler {
473     struct rcu_head rcu;
474     QLIST_ENTRY(MsgHandler) link;
475     uint32_t conn_id;
476     HvMsgHandler handler;
477     void *data;
478 } MsgHandler;
479 
480 typedef struct EventFlagHandler {
481     struct rcu_head rcu;
482     QLIST_ENTRY(EventFlagHandler) link;
483     uint32_t conn_id;
484     EventNotifier *notifier;
485 } EventFlagHandler;
486 
487 static QLIST_HEAD(, MsgHandler) msg_handlers;
488 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
489 static QemuMutex handlers_mutex;
490 
491 static void __attribute__((constructor)) hv_init(void)
492 {
493     QLIST_INIT(&msg_handlers);
494     QLIST_INIT(&event_flag_handlers);
495     qemu_mutex_init(&handlers_mutex);
496 }
497 
498 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
499 {
500     int ret;
501     MsgHandler *mh;
502 
503     QEMU_LOCK_GUARD(&handlers_mutex);
504     QLIST_FOREACH(mh, &msg_handlers, link) {
505         if (mh->conn_id == conn_id) {
506             if (handler) {
507                 ret = -EEXIST;
508             } else {
509                 QLIST_REMOVE_RCU(mh, link);
510                 g_free_rcu(mh, rcu);
511                 ret = 0;
512             }
513             return ret;
514         }
515     }
516 
517     if (handler) {
518         mh = g_new(MsgHandler, 1);
519         mh->conn_id = conn_id;
520         mh->handler = handler;
521         mh->data = data;
522         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
523         ret = 0;
524     } else {
525         ret = -ENOENT;
526     }
527 
528     return ret;
529 }
530 
531 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
532 {
533     uint16_t ret;
534     hwaddr len;
535     struct hyperv_post_message_input *msg;
536     MsgHandler *mh;
537 
538     if (fast) {
539         return HV_STATUS_INVALID_HYPERCALL_CODE;
540     }
541     if (param & (__alignof__(*msg) - 1)) {
542         return HV_STATUS_INVALID_ALIGNMENT;
543     }
544 
545     len = sizeof(*msg);
546     msg = cpu_physical_memory_map(param, &len, 0);
547     if (len < sizeof(*msg)) {
548         ret = HV_STATUS_INSUFFICIENT_MEMORY;
549         goto unmap;
550     }
551     if (msg->payload_size > sizeof(msg->payload)) {
552         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
553         goto unmap;
554     }
555 
556     ret = HV_STATUS_INVALID_CONNECTION_ID;
557     WITH_RCU_READ_LOCK_GUARD() {
558         QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
559             if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
560                 ret = mh->handler(msg, mh->data);
561                 break;
562             }
563         }
564     }
565 
566 unmap:
567     cpu_physical_memory_unmap(msg, len, 0, 0);
568     return ret;
569 }
570 
571 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
572 {
573     int ret;
574     EventFlagHandler *handler;
575 
576     QEMU_LOCK_GUARD(&handlers_mutex);
577     QLIST_FOREACH(handler, &event_flag_handlers, link) {
578         if (handler->conn_id == conn_id) {
579             if (notifier) {
580                 ret = -EEXIST;
581             } else {
582                 QLIST_REMOVE_RCU(handler, link);
583                 g_free_rcu(handler, rcu);
584                 ret = 0;
585             }
586             return ret;
587         }
588     }
589 
590     if (notifier) {
591         handler = g_new(EventFlagHandler, 1);
592         handler->conn_id = conn_id;
593         handler->notifier = notifier;
594         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
595         ret = 0;
596     } else {
597         ret = -ENOENT;
598     }
599 
600     return ret;
601 }
602 
603 static bool process_event_flags_userspace;
604 
605 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
606 {
607     if (!process_event_flags_userspace &&
608         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
609         process_event_flags_userspace = true;
610 
611         warn_report("Hyper-V event signaling is not supported by this kernel; "
612                     "using slower userspace hypercall processing");
613     }
614 
615     if (!process_event_flags_userspace) {
616         struct kvm_hyperv_eventfd hvevfd = {
617             .conn_id = conn_id,
618             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
619             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
620         };
621 
622         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
623     }
624     return set_event_flag_handler(conn_id, notifier);
625 }
626 
627 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
628 {
629     EventFlagHandler *handler;
630 
631     if (unlikely(!fast)) {
632         hwaddr addr = param;
633 
634         if (addr & (__alignof__(addr) - 1)) {
635             return HV_STATUS_INVALID_ALIGNMENT;
636         }
637 
638         param = ldq_phys(&address_space_memory, addr);
639     }
640 
641     /*
642      * Per spec, bits 32-47 contain the extra "flag number".  However, we
643      * have no use for it, and in all known usecases it is zero, so just
644      * report lookup failure if it isn't.
645      */
646     if (param & 0xffff00000000ULL) {
647         return HV_STATUS_INVALID_PORT_ID;
648     }
649     /* remaining bits are reserved-zero */
650     if (param & ~HV_CONNECTION_ID_MASK) {
651         return HV_STATUS_INVALID_HYPERCALL_INPUT;
652     }
653 
654     RCU_READ_LOCK_GUARD();
655     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
656         if (handler->conn_id == param) {
657             event_notifier_set(handler->notifier);
658             return 0;
659         }
660     }
661     return HV_STATUS_INVALID_CONNECTION_ID;
662 }
663