xref: /openbmc/qemu/hw/hyperv/hyperv.c (revision 64552b6b)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/queue.h"
19 #include "qemu/rcu.h"
20 #include "qemu/rcu_queue.h"
21 #include "hw/hyperv/hyperv.h"
22 
23 typedef struct SynICState {
24     DeviceState parent_obj;
25 
26     CPUState *cs;
27 
28     bool enabled;
29     hwaddr msg_page_addr;
30     hwaddr event_page_addr;
31     MemoryRegion msg_page_mr;
32     MemoryRegion event_page_mr;
33     struct hyperv_message_page *msg_page;
34     struct hyperv_event_flags_page *event_page;
35 } SynICState;
36 
37 #define TYPE_SYNIC "hyperv-synic"
38 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
39 
40 static SynICState *get_synic(CPUState *cs)
41 {
42     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
43 }
44 
45 static void synic_update(SynICState *synic, bool enable,
46                          hwaddr msg_page_addr, hwaddr event_page_addr)
47 {
48 
49     synic->enabled = enable;
50     if (synic->msg_page_addr != msg_page_addr) {
51         if (synic->msg_page_addr) {
52             memory_region_del_subregion(get_system_memory(),
53                                         &synic->msg_page_mr);
54         }
55         if (msg_page_addr) {
56             memory_region_add_subregion(get_system_memory(), msg_page_addr,
57                                         &synic->msg_page_mr);
58         }
59         synic->msg_page_addr = msg_page_addr;
60     }
61     if (synic->event_page_addr != event_page_addr) {
62         if (synic->event_page_addr) {
63             memory_region_del_subregion(get_system_memory(),
64                                         &synic->event_page_mr);
65         }
66         if (event_page_addr) {
67             memory_region_add_subregion(get_system_memory(), event_page_addr,
68                                         &synic->event_page_mr);
69         }
70         synic->event_page_addr = event_page_addr;
71     }
72 }
73 
74 void hyperv_synic_update(CPUState *cs, bool enable,
75                          hwaddr msg_page_addr, hwaddr event_page_addr)
76 {
77     SynICState *synic = get_synic(cs);
78 
79     if (!synic) {
80         return;
81     }
82 
83     synic_update(synic, enable, msg_page_addr, event_page_addr);
84 }
85 
86 static void synic_realize(DeviceState *dev, Error **errp)
87 {
88     Object *obj = OBJECT(dev);
89     SynICState *synic = SYNIC(dev);
90     char *msgp_name, *eventp_name;
91     uint32_t vp_index;
92 
93     /* memory region names have to be globally unique */
94     vp_index = hyperv_vp_index(synic->cs);
95     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
96     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
97 
98     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
99                            sizeof(*synic->msg_page), &error_abort);
100     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
101                            sizeof(*synic->event_page), &error_abort);
102     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
103     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
104 
105     g_free(msgp_name);
106     g_free(eventp_name);
107 }
108 static void synic_reset(DeviceState *dev)
109 {
110     SynICState *synic = SYNIC(dev);
111     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
112     memset(synic->event_page, 0, sizeof(*synic->event_page));
113     synic_update(synic, false, 0, 0);
114 }
115 
116 static void synic_class_init(ObjectClass *klass, void *data)
117 {
118     DeviceClass *dc = DEVICE_CLASS(klass);
119 
120     dc->realize = synic_realize;
121     dc->reset = synic_reset;
122     dc->user_creatable = false;
123 }
124 
125 void hyperv_synic_add(CPUState *cs)
126 {
127     Object *obj;
128     SynICState *synic;
129 
130     obj = object_new(TYPE_SYNIC);
131     synic = SYNIC(obj);
132     synic->cs = cs;
133     object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
134     object_unref(obj);
135     object_property_set_bool(obj, true, "realized", &error_abort);
136 }
137 
138 void hyperv_synic_reset(CPUState *cs)
139 {
140     SynICState *synic = get_synic(cs);
141 
142     if (synic) {
143         device_reset(DEVICE(synic));
144     }
145 }
146 
147 static const TypeInfo synic_type_info = {
148     .name = TYPE_SYNIC,
149     .parent = TYPE_DEVICE,
150     .instance_size = sizeof(SynICState),
151     .class_init = synic_class_init,
152 };
153 
154 static void synic_register_types(void)
155 {
156     type_register_static(&synic_type_info);
157 }
158 
159 type_init(synic_register_types)
160 
161 /*
162  * KVM has its own message producers (SynIC timers).  To guarantee
163  * serialization with both KVM vcpu and the guest cpu, the messages are first
164  * staged in an intermediate area and then posted to the SynIC message page in
165  * the vcpu thread.
166  */
167 typedef struct HvSintStagedMessage {
168     /* message content staged by hyperv_post_msg */
169     struct hyperv_message msg;
170     /* callback + data (r/o) to complete the processing in a BH */
171     HvSintMsgCb cb;
172     void *cb_data;
173     /* message posting status filled by cpu_post_msg */
174     int status;
175     /* passing the buck: */
176     enum {
177         /* initial state */
178         HV_STAGED_MSG_FREE,
179         /*
180          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
181          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
182          */
183         HV_STAGED_MSG_BUSY,
184         /*
185          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
186          * notify the guest, records the status, marks the posting done (BUSY
187          * -> POSTED), and schedules sint_msg_bh BH
188          */
189         HV_STAGED_MSG_POSTED,
190         /*
191          * sint_msg_bh (BH) verifies that the posting is done, runs the
192          * callback, and starts over (POSTED -> FREE)
193          */
194     } state;
195 } HvSintStagedMessage;
196 
197 struct HvSintRoute {
198     uint32_t sint;
199     SynICState *synic;
200     int gsi;
201     EventNotifier sint_set_notifier;
202     EventNotifier sint_ack_notifier;
203 
204     HvSintStagedMessage *staged_msg;
205 
206     unsigned refcount;
207 };
208 
209 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
210 {
211     CPUState *cs = qemu_get_cpu(vp_index);
212     assert(hyperv_vp_index(cs) == vp_index);
213     return cs;
214 }
215 
216 /*
217  * BH to complete the processing of a staged message.
218  */
219 static void sint_msg_bh(void *opaque)
220 {
221     HvSintRoute *sint_route = opaque;
222     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
223 
224     if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
225         /* status nor ready yet (spurious ack from guest?), ignore */
226         return;
227     }
228 
229     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
230     staged_msg->status = 0;
231 
232     /* staged message processing finished, ready to start over */
233     atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
234     /* drop the reference taken in hyperv_post_msg */
235     hyperv_sint_route_unref(sint_route);
236 }
237 
238 /*
239  * Worker to transfer the message from the staging area into the SynIC message
240  * page in vcpu context.
241  */
242 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
243 {
244     HvSintRoute *sint_route = data.host_ptr;
245     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
246     SynICState *synic = sint_route->synic;
247     struct hyperv_message *dst_msg;
248     bool wait_for_sint_ack = false;
249 
250     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
251 
252     if (!synic->enabled || !synic->msg_page_addr) {
253         staged_msg->status = -ENXIO;
254         goto posted;
255     }
256 
257     dst_msg = &synic->msg_page->slot[sint_route->sint];
258 
259     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
260         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
261         staged_msg->status = -EAGAIN;
262         wait_for_sint_ack = true;
263     } else {
264         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
265         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
266     }
267 
268     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
269 
270 posted:
271     atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
272     /*
273      * Notify the msg originator of the progress made; if the slot was busy we
274      * set msg_pending flag in it so it will be the guest who will do EOM and
275      * trigger the notification from KVM via sint_ack_notifier
276      */
277     if (!wait_for_sint_ack) {
278         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
279                                 sint_route);
280     }
281 }
282 
283 /*
284  * Post a Hyper-V message to the staging area, for delivery to guest in the
285  * vcpu thread.
286  */
287 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
288 {
289     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
290 
291     assert(staged_msg);
292 
293     /* grab the staging area */
294     if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
295                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
296         return -EAGAIN;
297     }
298 
299     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
300 
301     /* hold a reference on sint_route until the callback is finished */
302     hyperv_sint_route_ref(sint_route);
303 
304     /* schedule message posting attempt in vcpu thread */
305     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
306                      RUN_ON_CPU_HOST_PTR(sint_route));
307     return 0;
308 }
309 
310 static void sint_ack_handler(EventNotifier *notifier)
311 {
312     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
313                                            sint_ack_notifier);
314     event_notifier_test_and_clear(notifier);
315 
316     /*
317      * the guest consumed the previous message so complete the current one with
318      * -EAGAIN and let the msg originator retry
319      */
320     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
321 }
322 
323 /*
324  * Set given event flag for a given sint on a given vcpu, and signal the sint.
325  */
326 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
327 {
328     int ret;
329     SynICState *synic = sint_route->synic;
330     unsigned long *flags, set_mask;
331     unsigned set_idx;
332 
333     if (eventno > HV_EVENT_FLAGS_COUNT) {
334         return -EINVAL;
335     }
336     if (!synic->enabled || !synic->event_page_addr) {
337         return -ENXIO;
338     }
339 
340     set_idx = BIT_WORD(eventno);
341     set_mask = BIT_MASK(eventno);
342     flags = synic->event_page->slot[sint_route->sint].flags;
343 
344     if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
345         memory_region_set_dirty(&synic->event_page_mr, 0,
346                                 sizeof(*synic->event_page));
347         ret = hyperv_sint_route_set_sint(sint_route);
348     } else {
349         ret = 0;
350     }
351     return ret;
352 }
353 
354 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
355                                    HvSintMsgCb cb, void *cb_data)
356 {
357     HvSintRoute *sint_route;
358     EventNotifier *ack_notifier;
359     int r, gsi;
360     CPUState *cs;
361     SynICState *synic;
362 
363     cs = hyperv_find_vcpu(vp_index);
364     if (!cs) {
365         return NULL;
366     }
367 
368     synic = get_synic(cs);
369     if (!synic) {
370         return NULL;
371     }
372 
373     sint_route = g_new0(HvSintRoute, 1);
374     r = event_notifier_init(&sint_route->sint_set_notifier, false);
375     if (r) {
376         goto err;
377     }
378 
379 
380     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
381     if (ack_notifier) {
382         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
383         sint_route->staged_msg->cb = cb;
384         sint_route->staged_msg->cb_data = cb_data;
385 
386         r = event_notifier_init(ack_notifier, false);
387         if (r) {
388             goto err_sint_set_notifier;
389         }
390 
391         event_notifier_set_handler(ack_notifier, sint_ack_handler);
392     }
393 
394     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
395     if (gsi < 0) {
396         goto err_gsi;
397     }
398 
399     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
400                                            &sint_route->sint_set_notifier,
401                                            ack_notifier, gsi);
402     if (r) {
403         goto err_irqfd;
404     }
405     sint_route->gsi = gsi;
406     sint_route->synic = synic;
407     sint_route->sint = sint;
408     sint_route->refcount = 1;
409 
410     return sint_route;
411 
412 err_irqfd:
413     kvm_irqchip_release_virq(kvm_state, gsi);
414 err_gsi:
415     if (ack_notifier) {
416         event_notifier_set_handler(ack_notifier, NULL);
417         event_notifier_cleanup(ack_notifier);
418         g_free(sint_route->staged_msg);
419     }
420 err_sint_set_notifier:
421     event_notifier_cleanup(&sint_route->sint_set_notifier);
422 err:
423     g_free(sint_route);
424 
425     return NULL;
426 }
427 
428 void hyperv_sint_route_ref(HvSintRoute *sint_route)
429 {
430     sint_route->refcount++;
431 }
432 
433 void hyperv_sint_route_unref(HvSintRoute *sint_route)
434 {
435     if (!sint_route) {
436         return;
437     }
438 
439     assert(sint_route->refcount > 0);
440 
441     if (--sint_route->refcount) {
442         return;
443     }
444 
445     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
446                                           &sint_route->sint_set_notifier,
447                                           sint_route->gsi);
448     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
449     if (sint_route->staged_msg) {
450         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
451         event_notifier_cleanup(&sint_route->sint_ack_notifier);
452         g_free(sint_route->staged_msg);
453     }
454     event_notifier_cleanup(&sint_route->sint_set_notifier);
455     g_free(sint_route);
456 }
457 
458 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
459 {
460     return event_notifier_set(&sint_route->sint_set_notifier);
461 }
462 
463 typedef struct MsgHandler {
464     struct rcu_head rcu;
465     QLIST_ENTRY(MsgHandler) link;
466     uint32_t conn_id;
467     HvMsgHandler handler;
468     void *data;
469 } MsgHandler;
470 
471 typedef struct EventFlagHandler {
472     struct rcu_head rcu;
473     QLIST_ENTRY(EventFlagHandler) link;
474     uint32_t conn_id;
475     EventNotifier *notifier;
476 } EventFlagHandler;
477 
478 static QLIST_HEAD(, MsgHandler) msg_handlers;
479 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
480 static QemuMutex handlers_mutex;
481 
482 static void __attribute__((constructor)) hv_init(void)
483 {
484     QLIST_INIT(&msg_handlers);
485     QLIST_INIT(&event_flag_handlers);
486     qemu_mutex_init(&handlers_mutex);
487 }
488 
489 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
490 {
491     int ret;
492     MsgHandler *mh;
493 
494     qemu_mutex_lock(&handlers_mutex);
495     QLIST_FOREACH(mh, &msg_handlers, link) {
496         if (mh->conn_id == conn_id) {
497             if (handler) {
498                 ret = -EEXIST;
499             } else {
500                 QLIST_REMOVE_RCU(mh, link);
501                 g_free_rcu(mh, rcu);
502                 ret = 0;
503             }
504             goto unlock;
505         }
506     }
507 
508     if (handler) {
509         mh = g_new(MsgHandler, 1);
510         mh->conn_id = conn_id;
511         mh->handler = handler;
512         mh->data = data;
513         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
514         ret = 0;
515     } else {
516         ret = -ENOENT;
517     }
518 unlock:
519     qemu_mutex_unlock(&handlers_mutex);
520     return ret;
521 }
522 
523 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
524 {
525     uint16_t ret;
526     hwaddr len;
527     struct hyperv_post_message_input *msg;
528     MsgHandler *mh;
529 
530     if (fast) {
531         return HV_STATUS_INVALID_HYPERCALL_CODE;
532     }
533     if (param & (__alignof__(*msg) - 1)) {
534         return HV_STATUS_INVALID_ALIGNMENT;
535     }
536 
537     len = sizeof(*msg);
538     msg = cpu_physical_memory_map(param, &len, 0);
539     if (len < sizeof(*msg)) {
540         ret = HV_STATUS_INSUFFICIENT_MEMORY;
541         goto unmap;
542     }
543     if (msg->payload_size > sizeof(msg->payload)) {
544         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
545         goto unmap;
546     }
547 
548     ret = HV_STATUS_INVALID_CONNECTION_ID;
549     rcu_read_lock();
550     QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
551         if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
552             ret = mh->handler(msg, mh->data);
553             break;
554         }
555     }
556     rcu_read_unlock();
557 
558 unmap:
559     cpu_physical_memory_unmap(msg, len, 0, 0);
560     return ret;
561 }
562 
563 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
564 {
565     int ret;
566     EventFlagHandler *handler;
567 
568     qemu_mutex_lock(&handlers_mutex);
569     QLIST_FOREACH(handler, &event_flag_handlers, link) {
570         if (handler->conn_id == conn_id) {
571             if (notifier) {
572                 ret = -EEXIST;
573             } else {
574                 QLIST_REMOVE_RCU(handler, link);
575                 g_free_rcu(handler, rcu);
576                 ret = 0;
577             }
578             goto unlock;
579         }
580     }
581 
582     if (notifier) {
583         handler = g_new(EventFlagHandler, 1);
584         handler->conn_id = conn_id;
585         handler->notifier = notifier;
586         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
587         ret = 0;
588     } else {
589         ret = -ENOENT;
590     }
591 unlock:
592     qemu_mutex_unlock(&handlers_mutex);
593     return ret;
594 }
595 
596 static bool process_event_flags_userspace;
597 
598 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
599 {
600     if (!process_event_flags_userspace &&
601         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
602         process_event_flags_userspace = true;
603 
604         warn_report("Hyper-V event signaling is not supported by this kernel; "
605                     "using slower userspace hypercall processing");
606     }
607 
608     if (!process_event_flags_userspace) {
609         struct kvm_hyperv_eventfd hvevfd = {
610             .conn_id = conn_id,
611             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
612             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
613         };
614 
615         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
616     }
617     return set_event_flag_handler(conn_id, notifier);
618 }
619 
620 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
621 {
622     uint16_t ret;
623     EventFlagHandler *handler;
624 
625     if (unlikely(!fast)) {
626         hwaddr addr = param;
627 
628         if (addr & (__alignof__(addr) - 1)) {
629             return HV_STATUS_INVALID_ALIGNMENT;
630         }
631 
632         param = ldq_phys(&address_space_memory, addr);
633     }
634 
635     /*
636      * Per spec, bits 32-47 contain the extra "flag number".  However, we
637      * have no use for it, and in all known usecases it is zero, so just
638      * report lookup failure if it isn't.
639      */
640     if (param & 0xffff00000000ULL) {
641         return HV_STATUS_INVALID_PORT_ID;
642     }
643     /* remaining bits are reserved-zero */
644     if (param & ~HV_CONNECTION_ID_MASK) {
645         return HV_STATUS_INVALID_HYPERCALL_INPUT;
646     }
647 
648     ret = HV_STATUS_INVALID_CONNECTION_ID;
649     rcu_read_lock();
650     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
651         if (handler->conn_id == param) {
652             event_notifier_set(handler->notifier);
653             ret = 0;
654             break;
655         }
656     }
657     rcu_read_unlock();
658     return ret;
659 }
660