xref: /openbmc/qemu/hw/hyperv/hyperv.c (revision 9543fdaf)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qapi/error.h"
13 #include "exec/address-spaces.h"
14 #include "sysemu/kvm.h"
15 #include "qemu/bitops.h"
16 #include "qemu/error-report.h"
17 #include "qemu/queue.h"
18 #include "qemu/rcu.h"
19 #include "qemu/rcu_queue.h"
20 #include "hw/hyperv/hyperv.h"
21 
22 typedef struct SynICState {
23     DeviceState parent_obj;
24 
25     CPUState *cs;
26 
27     bool enabled;
28     hwaddr msg_page_addr;
29     hwaddr event_page_addr;
30     MemoryRegion msg_page_mr;
31     MemoryRegion event_page_mr;
32     struct hyperv_message_page *msg_page;
33     struct hyperv_event_flags_page *event_page;
34 } SynICState;
35 
36 #define TYPE_SYNIC "hyperv-synic"
37 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
38 
39 static SynICState *get_synic(CPUState *cs)
40 {
41     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
42 }
43 
44 static void synic_update(SynICState *synic, bool enable,
45                          hwaddr msg_page_addr, hwaddr event_page_addr)
46 {
47 
48     synic->enabled = enable;
49     if (synic->msg_page_addr != msg_page_addr) {
50         if (synic->msg_page_addr) {
51             memory_region_del_subregion(get_system_memory(),
52                                         &synic->msg_page_mr);
53         }
54         if (msg_page_addr) {
55             memory_region_add_subregion(get_system_memory(), msg_page_addr,
56                                         &synic->msg_page_mr);
57         }
58         synic->msg_page_addr = msg_page_addr;
59     }
60     if (synic->event_page_addr != event_page_addr) {
61         if (synic->event_page_addr) {
62             memory_region_del_subregion(get_system_memory(),
63                                         &synic->event_page_mr);
64         }
65         if (event_page_addr) {
66             memory_region_add_subregion(get_system_memory(), event_page_addr,
67                                         &synic->event_page_mr);
68         }
69         synic->event_page_addr = event_page_addr;
70     }
71 }
72 
73 void hyperv_synic_update(CPUState *cs, bool enable,
74                          hwaddr msg_page_addr, hwaddr event_page_addr)
75 {
76     SynICState *synic = get_synic(cs);
77 
78     if (!synic) {
79         return;
80     }
81 
82     synic_update(synic, enable, msg_page_addr, event_page_addr);
83 }
84 
85 static void synic_realize(DeviceState *dev, Error **errp)
86 {
87     Object *obj = OBJECT(dev);
88     SynICState *synic = SYNIC(dev);
89     char *msgp_name, *eventp_name;
90     uint32_t vp_index;
91 
92     /* memory region names have to be globally unique */
93     vp_index = hyperv_vp_index(synic->cs);
94     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
95     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
96 
97     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
98                            sizeof(*synic->msg_page), &error_abort);
99     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
100                            sizeof(*synic->event_page), &error_abort);
101     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
102     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
103 
104     g_free(msgp_name);
105     g_free(eventp_name);
106 }
107 static void synic_reset(DeviceState *dev)
108 {
109     SynICState *synic = SYNIC(dev);
110     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
111     memset(synic->event_page, 0, sizeof(*synic->event_page));
112     synic_update(synic, false, 0, 0);
113 }
114 
115 static void synic_class_init(ObjectClass *klass, void *data)
116 {
117     DeviceClass *dc = DEVICE_CLASS(klass);
118 
119     dc->realize = synic_realize;
120     dc->reset = synic_reset;
121     dc->user_creatable = false;
122 }
123 
124 void hyperv_synic_add(CPUState *cs)
125 {
126     Object *obj;
127     SynICState *synic;
128 
129     obj = object_new(TYPE_SYNIC);
130     synic = SYNIC(obj);
131     synic->cs = cs;
132     object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
133     object_unref(obj);
134     object_property_set_bool(obj, true, "realized", &error_abort);
135 }
136 
137 void hyperv_synic_reset(CPUState *cs)
138 {
139     SynICState *synic = get_synic(cs);
140 
141     if (synic) {
142         device_reset(DEVICE(synic));
143     }
144 }
145 
146 static const TypeInfo synic_type_info = {
147     .name = TYPE_SYNIC,
148     .parent = TYPE_DEVICE,
149     .instance_size = sizeof(SynICState),
150     .class_init = synic_class_init,
151 };
152 
153 static void synic_register_types(void)
154 {
155     type_register_static(&synic_type_info);
156 }
157 
158 type_init(synic_register_types)
159 
160 /*
161  * KVM has its own message producers (SynIC timers).  To guarantee
162  * serialization with both KVM vcpu and the guest cpu, the messages are first
163  * staged in an intermediate area and then posted to the SynIC message page in
164  * the vcpu thread.
165  */
166 typedef struct HvSintStagedMessage {
167     /* message content staged by hyperv_post_msg */
168     struct hyperv_message msg;
169     /* callback + data (r/o) to complete the processing in a BH */
170     HvSintMsgCb cb;
171     void *cb_data;
172     /* message posting status filled by cpu_post_msg */
173     int status;
174     /* passing the buck: */
175     enum {
176         /* initial state */
177         HV_STAGED_MSG_FREE,
178         /*
179          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
180          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
181          */
182         HV_STAGED_MSG_BUSY,
183         /*
184          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
185          * notify the guest, records the status, marks the posting done (BUSY
186          * -> POSTED), and schedules sint_msg_bh BH
187          */
188         HV_STAGED_MSG_POSTED,
189         /*
190          * sint_msg_bh (BH) verifies that the posting is done, runs the
191          * callback, and starts over (POSTED -> FREE)
192          */
193     } state;
194 } HvSintStagedMessage;
195 
196 struct HvSintRoute {
197     uint32_t sint;
198     SynICState *synic;
199     int gsi;
200     EventNotifier sint_set_notifier;
201     EventNotifier sint_ack_notifier;
202 
203     HvSintStagedMessage *staged_msg;
204 
205     unsigned refcount;
206 };
207 
208 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
209 {
210     CPUState *cs = qemu_get_cpu(vp_index);
211     assert(hyperv_vp_index(cs) == vp_index);
212     return cs;
213 }
214 
215 /*
216  * BH to complete the processing of a staged message.
217  */
218 static void sint_msg_bh(void *opaque)
219 {
220     HvSintRoute *sint_route = opaque;
221     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
222 
223     if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
224         /* status nor ready yet (spurious ack from guest?), ignore */
225         return;
226     }
227 
228     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
229     staged_msg->status = 0;
230 
231     /* staged message processing finished, ready to start over */
232     atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
233     /* drop the reference taken in hyperv_post_msg */
234     hyperv_sint_route_unref(sint_route);
235 }
236 
237 /*
238  * Worker to transfer the message from the staging area into the SynIC message
239  * page in vcpu context.
240  */
241 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
242 {
243     HvSintRoute *sint_route = data.host_ptr;
244     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
245     SynICState *synic = sint_route->synic;
246     struct hyperv_message *dst_msg;
247     bool wait_for_sint_ack = false;
248 
249     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
250 
251     if (!synic->enabled || !synic->msg_page_addr) {
252         staged_msg->status = -ENXIO;
253         goto posted;
254     }
255 
256     dst_msg = &synic->msg_page->slot[sint_route->sint];
257 
258     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
259         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
260         staged_msg->status = -EAGAIN;
261         wait_for_sint_ack = true;
262     } else {
263         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
264         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
265     }
266 
267     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
268 
269 posted:
270     atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
271     /*
272      * Notify the msg originator of the progress made; if the slot was busy we
273      * set msg_pending flag in it so it will be the guest who will do EOM and
274      * trigger the notification from KVM via sint_ack_notifier
275      */
276     if (!wait_for_sint_ack) {
277         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
278                                 sint_route);
279     }
280 }
281 
282 /*
283  * Post a Hyper-V message to the staging area, for delivery to guest in the
284  * vcpu thread.
285  */
286 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
287 {
288     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
289 
290     assert(staged_msg);
291 
292     /* grab the staging area */
293     if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
294                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
295         return -EAGAIN;
296     }
297 
298     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
299 
300     /* hold a reference on sint_route until the callback is finished */
301     hyperv_sint_route_ref(sint_route);
302 
303     /* schedule message posting attempt in vcpu thread */
304     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
305                      RUN_ON_CPU_HOST_PTR(sint_route));
306     return 0;
307 }
308 
309 static void sint_ack_handler(EventNotifier *notifier)
310 {
311     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
312                                            sint_ack_notifier);
313     event_notifier_test_and_clear(notifier);
314 
315     /*
316      * the guest consumed the previous message so complete the current one with
317      * -EAGAIN and let the msg originator retry
318      */
319     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
320 }
321 
322 /*
323  * Set given event flag for a given sint on a given vcpu, and signal the sint.
324  */
325 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
326 {
327     int ret;
328     SynICState *synic = sint_route->synic;
329     unsigned long *flags, set_mask;
330     unsigned set_idx;
331 
332     if (eventno > HV_EVENT_FLAGS_COUNT) {
333         return -EINVAL;
334     }
335     if (!synic->enabled || !synic->event_page_addr) {
336         return -ENXIO;
337     }
338 
339     set_idx = BIT_WORD(eventno);
340     set_mask = BIT_MASK(eventno);
341     flags = synic->event_page->slot[sint_route->sint].flags;
342 
343     if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
344         memory_region_set_dirty(&synic->event_page_mr, 0,
345                                 sizeof(*synic->event_page));
346         ret = hyperv_sint_route_set_sint(sint_route);
347     } else {
348         ret = 0;
349     }
350     return ret;
351 }
352 
353 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
354                                    HvSintMsgCb cb, void *cb_data)
355 {
356     HvSintRoute *sint_route;
357     EventNotifier *ack_notifier;
358     int r, gsi;
359     CPUState *cs;
360     SynICState *synic;
361 
362     cs = hyperv_find_vcpu(vp_index);
363     if (!cs) {
364         return NULL;
365     }
366 
367     synic = get_synic(cs);
368     if (!synic) {
369         return NULL;
370     }
371 
372     sint_route = g_new0(HvSintRoute, 1);
373     r = event_notifier_init(&sint_route->sint_set_notifier, false);
374     if (r) {
375         goto err;
376     }
377 
378 
379     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
380     if (ack_notifier) {
381         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
382         sint_route->staged_msg->cb = cb;
383         sint_route->staged_msg->cb_data = cb_data;
384 
385         r = event_notifier_init(ack_notifier, false);
386         if (r) {
387             goto err_sint_set_notifier;
388         }
389 
390         event_notifier_set_handler(ack_notifier, sint_ack_handler);
391     }
392 
393     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
394     if (gsi < 0) {
395         goto err_gsi;
396     }
397 
398     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
399                                            &sint_route->sint_set_notifier,
400                                            ack_notifier, gsi);
401     if (r) {
402         goto err_irqfd;
403     }
404     sint_route->gsi = gsi;
405     sint_route->synic = synic;
406     sint_route->sint = sint;
407     sint_route->refcount = 1;
408 
409     return sint_route;
410 
411 err_irqfd:
412     kvm_irqchip_release_virq(kvm_state, gsi);
413 err_gsi:
414     if (ack_notifier) {
415         event_notifier_set_handler(ack_notifier, NULL);
416         event_notifier_cleanup(ack_notifier);
417         g_free(sint_route->staged_msg);
418     }
419 err_sint_set_notifier:
420     event_notifier_cleanup(&sint_route->sint_set_notifier);
421 err:
422     g_free(sint_route);
423 
424     return NULL;
425 }
426 
427 void hyperv_sint_route_ref(HvSintRoute *sint_route)
428 {
429     sint_route->refcount++;
430 }
431 
432 void hyperv_sint_route_unref(HvSintRoute *sint_route)
433 {
434     if (!sint_route) {
435         return;
436     }
437 
438     assert(sint_route->refcount > 0);
439 
440     if (--sint_route->refcount) {
441         return;
442     }
443 
444     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
445                                           &sint_route->sint_set_notifier,
446                                           sint_route->gsi);
447     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
448     if (sint_route->staged_msg) {
449         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
450         event_notifier_cleanup(&sint_route->sint_ack_notifier);
451         g_free(sint_route->staged_msg);
452     }
453     event_notifier_cleanup(&sint_route->sint_set_notifier);
454     g_free(sint_route);
455 }
456 
457 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
458 {
459     return event_notifier_set(&sint_route->sint_set_notifier);
460 }
461 
462 typedef struct MsgHandler {
463     struct rcu_head rcu;
464     QLIST_ENTRY(MsgHandler) link;
465     uint32_t conn_id;
466     HvMsgHandler handler;
467     void *data;
468 } MsgHandler;
469 
470 typedef struct EventFlagHandler {
471     struct rcu_head rcu;
472     QLIST_ENTRY(EventFlagHandler) link;
473     uint32_t conn_id;
474     EventNotifier *notifier;
475 } EventFlagHandler;
476 
477 static QLIST_HEAD(, MsgHandler) msg_handlers;
478 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
479 static QemuMutex handlers_mutex;
480 
481 static void __attribute__((constructor)) hv_init(void)
482 {
483     QLIST_INIT(&msg_handlers);
484     QLIST_INIT(&event_flag_handlers);
485     qemu_mutex_init(&handlers_mutex);
486 }
487 
488 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
489 {
490     int ret;
491     MsgHandler *mh;
492 
493     qemu_mutex_lock(&handlers_mutex);
494     QLIST_FOREACH(mh, &msg_handlers, link) {
495         if (mh->conn_id == conn_id) {
496             if (handler) {
497                 ret = -EEXIST;
498             } else {
499                 QLIST_REMOVE_RCU(mh, link);
500                 g_free_rcu(mh, rcu);
501                 ret = 0;
502             }
503             goto unlock;
504         }
505     }
506 
507     if (handler) {
508         mh = g_new(MsgHandler, 1);
509         mh->conn_id = conn_id;
510         mh->handler = handler;
511         mh->data = data;
512         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
513         ret = 0;
514     } else {
515         ret = -ENOENT;
516     }
517 unlock:
518     qemu_mutex_unlock(&handlers_mutex);
519     return ret;
520 }
521 
522 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
523 {
524     uint16_t ret;
525     hwaddr len;
526     struct hyperv_post_message_input *msg;
527     MsgHandler *mh;
528 
529     if (fast) {
530         return HV_STATUS_INVALID_HYPERCALL_CODE;
531     }
532     if (param & (__alignof__(*msg) - 1)) {
533         return HV_STATUS_INVALID_ALIGNMENT;
534     }
535 
536     len = sizeof(*msg);
537     msg = cpu_physical_memory_map(param, &len, 0);
538     if (len < sizeof(*msg)) {
539         ret = HV_STATUS_INSUFFICIENT_MEMORY;
540         goto unmap;
541     }
542     if (msg->payload_size > sizeof(msg->payload)) {
543         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
544         goto unmap;
545     }
546 
547     ret = HV_STATUS_INVALID_CONNECTION_ID;
548     rcu_read_lock();
549     QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
550         if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
551             ret = mh->handler(msg, mh->data);
552             break;
553         }
554     }
555     rcu_read_unlock();
556 
557 unmap:
558     cpu_physical_memory_unmap(msg, len, 0, 0);
559     return ret;
560 }
561 
562 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
563 {
564     int ret;
565     EventFlagHandler *handler;
566 
567     qemu_mutex_lock(&handlers_mutex);
568     QLIST_FOREACH(handler, &event_flag_handlers, link) {
569         if (handler->conn_id == conn_id) {
570             if (notifier) {
571                 ret = -EEXIST;
572             } else {
573                 QLIST_REMOVE_RCU(handler, link);
574                 g_free_rcu(handler, rcu);
575                 ret = 0;
576             }
577             goto unlock;
578         }
579     }
580 
581     if (notifier) {
582         handler = g_new(EventFlagHandler, 1);
583         handler->conn_id = conn_id;
584         handler->notifier = notifier;
585         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
586         ret = 0;
587     } else {
588         ret = -ENOENT;
589     }
590 unlock:
591     qemu_mutex_unlock(&handlers_mutex);
592     return ret;
593 }
594 
595 static bool process_event_flags_userspace;
596 
597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
598 {
599     if (!process_event_flags_userspace &&
600         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
601         process_event_flags_userspace = true;
602 
603         warn_report("Hyper-V event signaling is not supported by this kernel; "
604                     "using slower userspace hypercall processing");
605     }
606 
607     if (!process_event_flags_userspace) {
608         struct kvm_hyperv_eventfd hvevfd = {
609             .conn_id = conn_id,
610             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
611             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
612         };
613 
614         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
615     }
616     return set_event_flag_handler(conn_id, notifier);
617 }
618 
619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
620 {
621     uint16_t ret;
622     EventFlagHandler *handler;
623 
624     if (unlikely(!fast)) {
625         hwaddr addr = param;
626 
627         if (addr & (__alignof__(addr) - 1)) {
628             return HV_STATUS_INVALID_ALIGNMENT;
629         }
630 
631         param = ldq_phys(&address_space_memory, addr);
632     }
633 
634     /*
635      * Per spec, bits 32-47 contain the extra "flag number".  However, we
636      * have no use for it, and in all known usecases it is zero, so just
637      * report lookup failure if it isn't.
638      */
639     if (param & 0xffff00000000ULL) {
640         return HV_STATUS_INVALID_PORT_ID;
641     }
642     /* remaining bits are reserved-zero */
643     if (param & ~HV_CONNECTION_ID_MASK) {
644         return HV_STATUS_INVALID_HYPERCALL_INPUT;
645     }
646 
647     ret = HV_STATUS_INVALID_CONNECTION_ID;
648     rcu_read_lock();
649     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
650         if (handler->conn_id == param) {
651             event_notifier_set(handler->notifier);
652             ret = 0;
653             break;
654         }
655     }
656     rcu_read_unlock();
657     return ret;
658 }
659