xref: /openbmc/qemu/hw/hyperv/vmbus.c (revision 173c427eb5705064da7dc8db22553c8df34f7f58)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/qdev-properties-system.h"
17 #include "hw/hyperv/hyperv.h"
18 #include "hw/hyperv/vmbus.h"
19 #include "hw/hyperv/vmbus-bridge.h"
20 #include "hw/sysbus.h"
21 #include "cpu.h"
22 #include "trace.h"
23 
24 enum {
25     VMGPADL_INIT,
26     VMGPADL_ALIVE,
27     VMGPADL_TEARINGDOWN,
28     VMGPADL_TORNDOWN,
29 };
30 
31 struct VMBusGpadl {
32     /* GPADL id */
33     uint32_t id;
34     /* associated channel id (rudimentary?) */
35     uint32_t child_relid;
36 
37     /* number of pages in the GPADL as declared in GPADL_HEADER message */
38     uint32_t num_gfns;
39     /*
40      * Due to limited message size, GPADL may not fit fully in a single
41      * GPADL_HEADER message, and is further popluated using GPADL_BODY
42      * messages.  @seen_gfns is the number of pages seen so far; once it
43      * reaches @num_gfns, the GPADL is ready to use.
44      */
45     uint32_t seen_gfns;
46     /* array of GFNs (of size @num_gfns once allocated) */
47     uint64_t *gfns;
48 
49     uint8_t state;
50 
51     QTAILQ_ENTRY(VMBusGpadl) link;
52     VMBus *vmbus;
53     unsigned refcount;
54 };
55 
56 /*
57  * Wrap sequential read from / write to GPADL.
58  */
59 typedef struct GpadlIter {
60     VMBusGpadl *gpadl;
61     AddressSpace *as;
62     DMADirection dir;
63     /* offset into GPADL where the next i/o will be performed */
64     uint32_t off;
65     /*
66      * Cached mapping of the currently accessed page, up to page boundary.
67      * Updated lazily on i/o.
68      * Note: MemoryRegionCache can not be used here because pages in the GPADL
69      * are non-contiguous and may belong to different memory regions.
70      */
71     void *map;
72     /* offset after last i/o (i.e. not affected by seek) */
73     uint32_t last_off;
74     /*
75      * Indicator that the iterator is active and may have a cached mapping.
76      * Allows to enforce bracketing of all i/o (which may create cached
77      * mappings) and thus exclude mapping leaks.
78      */
79     bool active;
80 } GpadlIter;
81 
82 /*
83  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
84  * channel.
85  * Each ring buffer consists of a set of pages, with the first page containing
86  * the ring buffer header, and the remaining pages being for data packets.
87  */
88 typedef struct VMBusRingBufCommon {
89     AddressSpace *as;
90     /* GPA of the ring buffer header */
91     dma_addr_t rb_addr;
92     /* start and length of the ring buffer data area within GPADL */
93     uint32_t base;
94     uint32_t len;
95 
96     GpadlIter iter;
97 } VMBusRingBufCommon;
98 
99 typedef struct VMBusSendRingBuf {
100     VMBusRingBufCommon common;
101     /* current write index, to be committed at the end of send */
102     uint32_t wr_idx;
103     /* write index at the start of send */
104     uint32_t last_wr_idx;
105     /* space to be requested from the guest */
106     uint32_t wanted;
107     /* space reserved for planned sends */
108     uint32_t reserved;
109     /* last seen read index */
110     uint32_t last_seen_rd_idx;
111 } VMBusSendRingBuf;
112 
113 typedef struct VMBusRecvRingBuf {
114     VMBusRingBufCommon common;
115     /* current read index, to be committed at the end of receive */
116     uint32_t rd_idx;
117     /* read index at the start of receive */
118     uint32_t last_rd_idx;
119     /* last seen write index */
120     uint32_t last_seen_wr_idx;
121 } VMBusRecvRingBuf;
122 
123 
124 enum {
125     VMOFFER_INIT,
126     VMOFFER_SENDING,
127     VMOFFER_SENT,
128 };
129 
130 enum {
131     VMCHAN_INIT,
132     VMCHAN_OPENING,
133     VMCHAN_OPEN,
134 };
135 
136 struct VMBusChannel {
137     VMBusDevice *dev;
138 
139     /* channel id */
140     uint32_t id;
141     /*
142      * subchannel index within the device; subchannel #0 is "primary" and
143      * always exists
144      */
145     uint16_t subchan_idx;
146     uint32_t open_id;
147     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
148     uint32_t target_vp;
149     /* GPADL id to use for the ring buffers */
150     uint32_t ringbuf_gpadl;
151     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
152     uint32_t ringbuf_send_offset;
153 
154     uint8_t offer_state;
155     uint8_t state;
156     bool is_open;
157 
158     /* main device worker; copied from the device class */
159     VMBusChannelNotifyCb notify_cb;
160     /*
161      * guest->host notifications, either sent directly or dispatched via
162      * interrupt page (older VMBus)
163      */
164     EventNotifier notifier;
165 
166     VMBus *vmbus;
167     /*
168      * SINT route to signal with host->guest notifications; may be shared with
169      * the main VMBus SINT route
170      */
171     HvSintRoute *notify_route;
172     VMBusGpadl *gpadl;
173 
174     VMBusSendRingBuf send_ringbuf;
175     VMBusRecvRingBuf recv_ringbuf;
176 
177     QTAILQ_ENTRY(VMBusChannel) link;
178 };
179 
180 /*
181  * Hyper-V spec mandates that every message port has 16 buffers, which means
182  * that the guest can post up to this many messages without blocking.
183  * Therefore a queue for incoming messages has to be provided.
184  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
185  * doesn't transition to a new state until the message is known to have been
186  * successfully delivered to the respective SynIC message slot.
187  */
188 #define HV_MSG_QUEUE_LEN     16
189 
190 /* Hyper-V devices never use channel #0.  Must be something special. */
191 #define VMBUS_FIRST_CHANID      1
192 /* Each channel occupies one bit within a single event page sint slot. */
193 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
194 /* Leave a few connection numbers for other purposes. */
195 #define VMBUS_CHAN_CONNECTION_OFFSET     16
196 
197 /*
198  * Since the success or failure of sending a message is reported
199  * asynchronously, the VMBus state machine has effectively two entry points:
200  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
201  * message delivery status becomes known).  Both are run as oneshot BHs on the
202  * main aio context, ensuring serialization.
203  */
204 enum {
205     VMBUS_LISTEN,
206     VMBUS_HANDSHAKE,
207     VMBUS_OFFER,
208     VMBUS_CREATE_GPADL,
209     VMBUS_TEARDOWN_GPADL,
210     VMBUS_OPEN_CHANNEL,
211     VMBUS_UNLOAD,
212     VMBUS_STATE_MAX
213 };
214 
215 struct VMBus {
216     BusState parent;
217 
218     uint8_t state;
219     /* protection against recursive aio_poll (see vmbus_run) */
220     bool in_progress;
221     /* whether there's a message being delivered to the guest */
222     bool msg_in_progress;
223     uint32_t version;
224     /* VP_INDEX of the vCPU to send messages and interrupts to */
225     uint32_t target_vp;
226     HvSintRoute *sint_route;
227     /*
228      * interrupt page for older protocol versions; newer ones use SynIC event
229      * flags directly
230      */
231     hwaddr int_page_gpa;
232 
233     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
234 
235     /* incoming message queue */
236     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
237     uint8_t rx_queue_head;
238     uint8_t rx_queue_size;
239     QemuMutex rx_queue_lock;
240 
241     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
242     QTAILQ_HEAD(, VMBusChannel) channel_list;
243 
244     /*
245      * guest->host notifications for older VMBus, to be dispatched via
246      * interrupt page
247      */
248     EventNotifier notifier;
249 };
250 
gpadl_full(VMBusGpadl * gpadl)251 static bool gpadl_full(VMBusGpadl *gpadl)
252 {
253     return gpadl->seen_gfns == gpadl->num_gfns;
254 }
255 
create_gpadl(VMBus * vmbus,uint32_t id,uint32_t child_relid,uint32_t num_gfns)256 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
257                                 uint32_t child_relid, uint32_t num_gfns)
258 {
259     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
260 
261     gpadl->id = id;
262     gpadl->child_relid = child_relid;
263     gpadl->num_gfns = num_gfns;
264     gpadl->gfns = g_new(uint64_t, num_gfns);
265     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
266     gpadl->vmbus = vmbus;
267     gpadl->refcount = 1;
268     return gpadl;
269 }
270 
free_gpadl(VMBusGpadl * gpadl)271 static void free_gpadl(VMBusGpadl *gpadl)
272 {
273     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
274     g_free(gpadl->gfns);
275     g_free(gpadl);
276 }
277 
find_gpadl(VMBus * vmbus,uint32_t gpadl_id)278 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
279 {
280     VMBusGpadl *gpadl;
281     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
282         if (gpadl->id == gpadl_id) {
283             return gpadl;
284         }
285     }
286     return NULL;
287 }
288 
vmbus_get_gpadl(VMBusChannel * chan,uint32_t gpadl_id)289 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
290 {
291     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
292     if (!gpadl || !gpadl_full(gpadl)) {
293         return NULL;
294     }
295     gpadl->refcount++;
296     return gpadl;
297 }
298 
vmbus_put_gpadl(VMBusGpadl * gpadl)299 void vmbus_put_gpadl(VMBusGpadl *gpadl)
300 {
301     if (!gpadl) {
302         return;
303     }
304     if (--gpadl->refcount) {
305         return;
306     }
307     free_gpadl(gpadl);
308 }
309 
vmbus_gpadl_len(VMBusGpadl * gpadl)310 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
311 {
312     return gpadl->num_gfns * TARGET_PAGE_SIZE;
313 }
314 
gpadl_iter_init(GpadlIter * iter,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir)315 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
316                             AddressSpace *as, DMADirection dir)
317 {
318     iter->gpadl = gpadl;
319     iter->as = as;
320     iter->dir = dir;
321     iter->active = false;
322 }
323 
gpadl_iter_cache_unmap(GpadlIter * iter)324 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
325 {
326     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
327     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
328 
329     /* mapping is only done to do non-zero amount of i/o */
330     assert(iter->last_off > 0);
331     assert(map_start_in_page < io_end_in_page);
332 
333     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
334                      iter->dir, io_end_in_page - map_start_in_page);
335 }
336 
337 /*
338  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
339  * The direction of the copy is determined by @iter->dir.
340  * The caller must ensure the operation overflows neither @buf nor the GPADL
341  * (there's an assert for the latter).
342  * Reuse the currently mapped page in the GPADL if possible.
343  */
gpadl_iter_io(GpadlIter * iter,void * buf,uint32_t len)344 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
345 {
346     ssize_t ret = len;
347 
348     assert(iter->active);
349 
350     while (len) {
351         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
352         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
353         uint32_t cplen = MIN(pgleft, len);
354         void *p;
355 
356         /* try to reuse the cached mapping */
357         if (iter->map) {
358             uint32_t map_start_in_page =
359                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
360             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
361             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
362             if (off_base != mapped_base || off_in_page < map_start_in_page) {
363                 gpadl_iter_cache_unmap(iter);
364                 iter->map = NULL;
365             }
366         }
367 
368         if (!iter->map) {
369             dma_addr_t maddr;
370             dma_addr_t mlen = pgleft;
371             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
372             assert(idx < iter->gpadl->num_gfns);
373 
374             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
375 
376             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
377                                        MEMTXATTRS_UNSPECIFIED);
378             if (mlen != pgleft) {
379                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
380                 iter->map = NULL;
381                 return -EFAULT;
382             }
383         }
384 
385         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
386                 off_in_page);
387         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
388             memcpy(p, buf, cplen);
389         } else {
390             memcpy(buf, p, cplen);
391         }
392 
393         buf += cplen;
394         len -= cplen;
395         iter->off += cplen;
396         iter->last_off = iter->off;
397     }
398 
399     return ret;
400 }
401 
402 /*
403  * Position the iterator @iter at new offset @new_off.
404  * If this results in the cached mapping being unusable with the new offset,
405  * unmap it.
406  */
gpadl_iter_seek(GpadlIter * iter,uint32_t new_off)407 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
408 {
409     assert(iter->active);
410     iter->off = new_off;
411 }
412 
413 /*
414  * Start a series of i/o on the GPADL.
415  * After this i/o and seek operations on @iter become legal.
416  */
gpadl_iter_start_io(GpadlIter * iter)417 static inline void gpadl_iter_start_io(GpadlIter *iter)
418 {
419     assert(!iter->active);
420     /* mapping is cached lazily on i/o */
421     iter->map = NULL;
422     iter->active = true;
423 }
424 
425 /*
426  * End the eariler started series of i/o on the GPADL and release the cached
427  * mapping if any.
428  */
gpadl_iter_end_io(GpadlIter * iter)429 static inline void gpadl_iter_end_io(GpadlIter *iter)
430 {
431     assert(iter->active);
432 
433     if (iter->map) {
434         gpadl_iter_cache_unmap(iter);
435     }
436 
437     iter->active = false;
438 }
439 
440 static void vmbus_resched(VMBus *vmbus);
441 static void vmbus_msg_cb(void *data, int status);
442 
vmbus_iov_to_gpadl(VMBusChannel * chan,VMBusGpadl * gpadl,uint32_t off,const struct iovec * iov,size_t iov_cnt)443 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
444                            const struct iovec *iov, size_t iov_cnt)
445 {
446     GpadlIter iter;
447     size_t i;
448     ssize_t ret = 0;
449 
450     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
451                     DMA_DIRECTION_FROM_DEVICE);
452     gpadl_iter_start_io(&iter);
453     gpadl_iter_seek(&iter, off);
454     for (i = 0; i < iov_cnt; i++) {
455         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
456         if (ret < 0) {
457             goto out;
458         }
459     }
460 out:
461     gpadl_iter_end_io(&iter);
462     return ret;
463 }
464 
vmbus_map_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t len,size_t off)465 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
466                   unsigned iov_cnt, size_t len, size_t off)
467 {
468     int ret_cnt = 0, ret;
469     unsigned i;
470     QEMUSGList *sgl = &req->sgl;
471     ScatterGatherEntry *sg = sgl->sg;
472 
473     for (i = 0; i < sgl->nsg; i++) {
474         if (sg[i].len > off) {
475             break;
476         }
477         off -= sg[i].len;
478     }
479     for (; len && i < sgl->nsg; i++) {
480         dma_addr_t mlen = MIN(sg[i].len - off, len);
481         dma_addr_t addr = sg[i].base + off;
482         len -= mlen;
483         off = 0;
484 
485         for (; mlen; ret_cnt++) {
486             dma_addr_t l = mlen;
487             dma_addr_t a = addr;
488 
489             if (ret_cnt == iov_cnt) {
490                 ret = -ENOBUFS;
491                 goto err;
492             }
493 
494             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
495                                                    MEMTXATTRS_UNSPECIFIED);
496             if (!l) {
497                 ret = -EFAULT;
498                 goto err;
499             }
500             iov[ret_cnt].iov_len = l;
501             addr += l;
502             mlen -= l;
503         }
504     }
505 
506     return ret_cnt;
507 err:
508     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
509     return ret;
510 }
511 
vmbus_unmap_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t accessed)512 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
513                      unsigned iov_cnt, size_t accessed)
514 {
515     QEMUSGList *sgl = &req->sgl;
516     unsigned i;
517 
518     for (i = 0; i < iov_cnt; i++) {
519         size_t acsd = MIN(accessed, iov[i].iov_len);
520         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
521         accessed -= acsd;
522     }
523 }
524 
525 static const VMStateDescription vmstate_gpadl = {
526     .name = "vmbus/gpadl",
527     .version_id = 0,
528     .minimum_version_id = 0,
529     .fields = (const VMStateField[]) {
530         VMSTATE_UINT32(id, VMBusGpadl),
531         VMSTATE_UINT32(child_relid, VMBusGpadl),
532         VMSTATE_UINT32(num_gfns, VMBusGpadl),
533         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
534         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
535                                     vmstate_info_uint64, uint64_t),
536         VMSTATE_UINT8(state, VMBusGpadl),
537         VMSTATE_END_OF_LIST()
538     }
539 };
540 
541 /*
542  * Wrap the index into a ring buffer of @len bytes.
543  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
544  * single wraparound is considered.
545  */
rb_idx_wrap(uint32_t idx,uint32_t len)546 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
547 {
548     if (idx >= len) {
549         idx -= len;
550     }
551     return idx;
552 }
553 
554 /*
555  * Circular difference between two indices into a ring buffer of @len bytes.
556  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
557  * up write index but not vice versa.
558  */
rb_idx_delta(uint32_t idx1,uint32_t idx2,uint32_t len,bool allow_catchup)559 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
560                                     bool allow_catchup)
561 {
562     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
563 }
564 
ringbuf_map_hdr(VMBusRingBufCommon * ringbuf)565 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
566 {
567     vmbus_ring_buffer *rb;
568     dma_addr_t mlen = sizeof(*rb);
569 
570     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
571                         DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
572     if (mlen != sizeof(*rb)) {
573         dma_memory_unmap(ringbuf->as, rb, mlen,
574                          DMA_DIRECTION_FROM_DEVICE, 0);
575         return NULL;
576     }
577     return rb;
578 }
579 
ringbuf_unmap_hdr(VMBusRingBufCommon * ringbuf,vmbus_ring_buffer * rb,bool dirty)580 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
581                               vmbus_ring_buffer *rb, bool dirty)
582 {
583     assert(rb);
584 
585     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
586                      dirty ? sizeof(*rb) : 0);
587 }
588 
ringbuf_init_common(VMBusRingBufCommon * ringbuf,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir,uint32_t begin,uint32_t end)589 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
590                                 AddressSpace *as, DMADirection dir,
591                                 uint32_t begin, uint32_t end)
592 {
593     ringbuf->as = as;
594     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
595     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
596     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
597     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
598 }
599 
ringbufs_init(VMBusChannel * chan)600 static int ringbufs_init(VMBusChannel *chan)
601 {
602     vmbus_ring_buffer *rb;
603     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
604     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
605 
606     if (chan->ringbuf_send_offset <= 1 ||
607         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
608         return -EINVAL;
609     }
610 
611     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
612                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
613     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
614                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
615                         chan->gpadl->num_gfns);
616     send_ringbuf->wanted = 0;
617     send_ringbuf->reserved = 0;
618 
619     rb = ringbuf_map_hdr(&recv_ringbuf->common);
620     if (!rb) {
621         return -EFAULT;
622     }
623     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
624     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
625 
626     rb = ringbuf_map_hdr(&send_ringbuf->common);
627     if (!rb) {
628         return -EFAULT;
629     }
630     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
631     send_ringbuf->last_seen_rd_idx = rb->read_index;
632     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
633     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
634 
635     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
636         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
637         return -EOVERFLOW;
638     }
639 
640     return 0;
641 }
642 
643 /*
644  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
645  * around if needed.
646  * @len is assumed not to exceed the size of the ringbuffer, so only single
647  * wraparound is considered.
648  */
ringbuf_io(VMBusRingBufCommon * ringbuf,void * buf,uint32_t len)649 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
650 {
651     ssize_t ret1 = 0, ret2 = 0;
652     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
653 
654     if (len >= remain) {
655         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
656         if (ret1 < 0) {
657             return ret1;
658         }
659         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
660         buf += remain;
661         len -= remain;
662     }
663     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
664     if (ret2 < 0) {
665         return ret2;
666     }
667     return ret1 + ret2;
668 }
669 
670 /*
671  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
672  * around if needed.
673  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
674  * single wraparound is considered.
675  */
ringbuf_seek(VMBusRingBufCommon * ringbuf,uint32_t new_off)676 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
677 {
678     gpadl_iter_seek(&ringbuf->iter,
679                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
680 }
681 
ringbuf_tell(VMBusRingBufCommon * ringbuf)682 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
683 {
684     return ringbuf->iter.off - ringbuf->base;
685 }
686 
ringbuf_start_io(VMBusRingBufCommon * ringbuf)687 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
688 {
689     gpadl_iter_start_io(&ringbuf->iter);
690 }
691 
ringbuf_end_io(VMBusRingBufCommon * ringbuf)692 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
693 {
694     gpadl_iter_end_io(&ringbuf->iter);
695 }
696 
vmbus_channel_device(VMBusChannel * chan)697 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
698 {
699     return chan->dev;
700 }
701 
vmbus_device_channel(VMBusDevice * dev,uint32_t chan_idx)702 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
703 {
704     if (chan_idx >= dev->num_channels) {
705         return NULL;
706     }
707     return &dev->channels[chan_idx];
708 }
709 
vmbus_channel_idx(VMBusChannel * chan)710 uint32_t vmbus_channel_idx(VMBusChannel *chan)
711 {
712     return chan - chan->dev->channels;
713 }
714 
vmbus_channel_notify_host(VMBusChannel * chan)715 void vmbus_channel_notify_host(VMBusChannel *chan)
716 {
717     event_notifier_set(&chan->notifier);
718 }
719 
vmbus_channel_is_open(VMBusChannel * chan)720 bool vmbus_channel_is_open(VMBusChannel *chan)
721 {
722     return chan->is_open;
723 }
724 
725 /*
726  * Notify the guest side about the data to work on in the channel ring buffer.
727  * The notification is done by signaling a dedicated per-channel SynIC event
728  * flag (more recent guests) or setting a bit in the interrupt page and firing
729  * the VMBus SINT (older guests).
730  */
vmbus_channel_notify_guest(VMBusChannel * chan)731 static int vmbus_channel_notify_guest(VMBusChannel *chan)
732 {
733     int res = 0;
734     unsigned long *int_map, mask;
735     unsigned idx;
736     hwaddr addr = chan->vmbus->int_page_gpa;
737     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
738 
739     trace_vmbus_channel_notify_guest(chan->id);
740 
741     if (!addr) {
742         return hyperv_set_event_flag(chan->notify_route, chan->id);
743     }
744 
745     int_map = cpu_physical_memory_map(addr, &len, 1);
746     if (len != TARGET_PAGE_SIZE / 2) {
747         res = -ENXIO;
748         goto unmap;
749     }
750 
751     idx = BIT_WORD(chan->id);
752     mask = BIT_MASK(chan->id);
753     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
754         res = hyperv_sint_route_set_sint(chan->notify_route);
755         dirty = len;
756     }
757 
758 unmap:
759     cpu_physical_memory_unmap(int_map, len, 1, dirty);
760     return res;
761 }
762 
763 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
764 
vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr * hdr,uint32_t desclen,uint32_t msglen)765 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
766                                           uint32_t desclen, uint32_t msglen)
767 {
768     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
769         DIV_ROUND_UP(desclen, sizeof(uint64_t));
770     hdr->len_qwords = hdr->offset_qwords +
771         DIV_ROUND_UP(msglen, sizeof(uint64_t));
772     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
773 }
774 
775 /*
776  * Simplified ring buffer operation with paired barriers annotations in the
777  * producer and consumer loops:
778  *
779  * producer                           * consumer
780  * ~~~~~~~~                           * ~~~~~~~~
781  * write pending_send_sz              * read write_index
782  * smp_mb                       [A]   * smp_mb                       [C]
783  * read read_index                    * read packet
784  * smp_mb                       [B]   * read/write out-of-band data
785  * read/write out-of-band data        * smp_mb                       [B]
786  * write packet                       * write read_index
787  * smp_mb                       [C]   * smp_mb                       [A]
788  * write write_index                  * read pending_send_sz
789  * smp_wmb                      [D]   * smp_rmb                      [D]
790  * write pending_send_sz              * read write_index
791  * ...                                * ...
792  */
793 
ringbuf_send_avail(VMBusSendRingBuf * ringbuf)794 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
795 {
796     /* don't trust guest data */
797     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
798         return 0;
799     }
800     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
801                         ringbuf->common.len, false);
802 }
803 
ringbuf_send_update_idx(VMBusChannel * chan)804 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
805 {
806     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
807     vmbus_ring_buffer *rb;
808     uint32_t written;
809 
810     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
811                            ringbuf->common.len, true);
812     if (!written) {
813         return 0;
814     }
815 
816     rb = ringbuf_map_hdr(&ringbuf->common);
817     if (!rb) {
818         return -EFAULT;
819     }
820 
821     ringbuf->reserved -= written;
822 
823     /* prevent reorder with the data operation and packet write */
824     smp_mb();                   /* barrier pair [C] */
825     rb->write_index = ringbuf->wr_idx;
826 
827     /*
828      * If the producer earlier indicated that it wants to be notified when the
829      * consumer frees certain amount of space in the ring buffer, that amount
830      * is reduced by the size of the completed write.
831      */
832     if (ringbuf->wanted) {
833         /* otherwise reservation would fail */
834         assert(ringbuf->wanted < written);
835         ringbuf->wanted -= written;
836         /* prevent reorder with write_index write */
837         smp_wmb();              /* barrier pair [D] */
838         rb->pending_send_sz = ringbuf->wanted;
839     }
840 
841     /* prevent reorder with write_index or pending_send_sz write */
842     smp_mb();                   /* barrier pair [A] */
843     ringbuf->last_seen_rd_idx = rb->read_index;
844 
845     /*
846      * The consumer may have missed the reduction of pending_send_sz and skip
847      * notification, so re-check the blocking condition, and, if it's no longer
848      * true, ensure processing another iteration by simulating consumer's
849      * notification.
850      */
851     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
852         vmbus_channel_notify_host(chan);
853     }
854 
855     /* skip notification by consumer's request */
856     if (rb->interrupt_mask) {
857         goto out;
858     }
859 
860     /*
861      * The consumer hasn't caught up with the producer's previous state so it's
862      * not blocked.
863      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
864      * validation here as it only affects notification.)
865      */
866     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
867                      ringbuf->common.len, true) > written) {
868         goto out;
869     }
870 
871     vmbus_channel_notify_guest(chan);
872 out:
873     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
874     ringbuf->last_wr_idx = ringbuf->wr_idx;
875     return written;
876 }
877 
vmbus_channel_reserve(VMBusChannel * chan,uint32_t desclen,uint32_t msglen)878 int vmbus_channel_reserve(VMBusChannel *chan,
879                           uint32_t desclen, uint32_t msglen)
880 {
881     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
882     vmbus_ring_buffer *rb = NULL;
883     vmbus_packet_hdr hdr;
884     uint32_t needed = ringbuf->reserved +
885         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
886 
887     /* avoid touching the guest memory if possible */
888     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
889         goto success;
890     }
891 
892     rb = ringbuf_map_hdr(&ringbuf->common);
893     if (!rb) {
894         return -EFAULT;
895     }
896 
897     /* fetch read index from guest memory and try again */
898     ringbuf->last_seen_rd_idx = rb->read_index;
899 
900     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
901         goto success;
902     }
903 
904     rb->pending_send_sz = needed;
905 
906     /*
907      * The consumer may have made progress and freed up some space before
908      * seeing updated pending_send_sz, so re-read read_index (preventing
909      * reorder with the pending_send_sz write) and try again.
910      */
911     smp_mb();                   /* barrier pair [A] */
912     ringbuf->last_seen_rd_idx = rb->read_index;
913 
914     if (needed > ringbuf_send_avail(ringbuf)) {
915         goto out;
916     }
917 
918 success:
919     ringbuf->reserved = needed;
920     needed = 0;
921 
922     /* clear pending_send_sz if it was set */
923     if (ringbuf->wanted) {
924         if (!rb) {
925             rb = ringbuf_map_hdr(&ringbuf->common);
926             if (!rb) {
927                 /* failure to clear pending_send_sz is non-fatal */
928                 goto out;
929             }
930         }
931 
932         rb->pending_send_sz = 0;
933     }
934 
935     /* prevent reorder of the following data operation with read_index read */
936     smp_mb();                   /* barrier pair [B] */
937 
938 out:
939     if (rb) {
940         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
941     }
942     ringbuf->wanted = needed;
943     return needed ? -ENOSPC : 0;
944 }
945 
vmbus_channel_send(VMBusChannel * chan,uint16_t pkt_type,void * desc,uint32_t desclen,void * msg,uint32_t msglen,bool need_comp,uint64_t transaction_id)946 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
947                            void *desc, uint32_t desclen,
948                            void *msg, uint32_t msglen,
949                            bool need_comp, uint64_t transaction_id)
950 {
951     ssize_t ret = 0;
952     vmbus_packet_hdr hdr;
953     uint32_t totlen;
954     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
955 
956     if (!vmbus_channel_is_open(chan)) {
957         return -EINVAL;
958     }
959 
960     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
961     hdr.type = pkt_type;
962     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
963     hdr.transaction_id = transaction_id;
964 
965     assert(totlen <= ringbuf->reserved);
966 
967     ringbuf_start_io(&ringbuf->common);
968     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
969     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
970     if (ret < 0) {
971         goto out;
972     }
973     if (desclen) {
974         assert(desc);
975         ret = ringbuf_io(&ringbuf->common, desc, desclen);
976         if (ret < 0) {
977             goto out;
978         }
979         ringbuf_seek(&ringbuf->common,
980                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
981     }
982     ret = ringbuf_io(&ringbuf->common, msg, msglen);
983     if (ret < 0) {
984         goto out;
985     }
986     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
987     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
988     ret = 0;
989 out:
990     ringbuf_end_io(&ringbuf->common);
991     if (ret) {
992         return ret;
993     }
994     return ringbuf_send_update_idx(chan);
995 }
996 
vmbus_channel_send_completion(VMBusChanReq * req,void * msg,uint32_t msglen)997 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
998                                       void *msg, uint32_t msglen)
999 {
1000     assert(req->need_comp);
1001     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1002                               msg, msglen, false, req->transaction_id);
1003 }
1004 
sgl_from_gpa_ranges(QEMUSGList * sgl,VMBusDevice * dev,VMBusRingBufCommon * ringbuf,uint32_t len)1005 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1006                                VMBusRingBufCommon *ringbuf, uint32_t len)
1007 {
1008     int ret;
1009     vmbus_pkt_gpa_direct hdr;
1010     hwaddr curaddr = 0;
1011     hwaddr curlen = 0;
1012     int num;
1013 
1014     if (len < sizeof(hdr)) {
1015         return -EIO;
1016     }
1017     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1018     if (ret < 0) {
1019         return ret;
1020     }
1021     len -= sizeof(hdr);
1022 
1023     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1024     if (num < 0) {
1025         return -EIO;
1026     }
1027     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1028 
1029     for (; hdr.rangecount; hdr.rangecount--) {
1030         vmbus_gpa_range range;
1031 
1032         if (len < sizeof(range)) {
1033             goto eio;
1034         }
1035         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1036         if (ret < 0) {
1037             goto err;
1038         }
1039         len -= sizeof(range);
1040 
1041         if (range.byte_offset & TARGET_PAGE_MASK) {
1042             goto eio;
1043         }
1044 
1045         for (; range.byte_count; range.byte_offset = 0) {
1046             uint64_t paddr;
1047             uint32_t plen = MIN(range.byte_count,
1048                                 TARGET_PAGE_SIZE - range.byte_offset);
1049 
1050             if (len < sizeof(uint64_t)) {
1051                 goto eio;
1052             }
1053             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1054             if (ret < 0) {
1055                 goto err;
1056             }
1057             len -= sizeof(uint64_t);
1058             paddr <<= TARGET_PAGE_BITS;
1059             paddr |= range.byte_offset;
1060             range.byte_count -= plen;
1061 
1062             if (curaddr + curlen == paddr) {
1063                 /* consecutive fragments - join */
1064                 curlen += plen;
1065             } else {
1066                 if (curlen) {
1067                     qemu_sglist_add(sgl, curaddr, curlen);
1068                 }
1069 
1070                 curaddr = paddr;
1071                 curlen = plen;
1072             }
1073         }
1074     }
1075 
1076     if (curlen) {
1077         qemu_sglist_add(sgl, curaddr, curlen);
1078     }
1079 
1080     return 0;
1081 eio:
1082     ret = -EIO;
1083 err:
1084     qemu_sglist_destroy(sgl);
1085     return ret;
1086 }
1087 
vmbus_alloc_req(VMBusChannel * chan,uint32_t size,uint16_t pkt_type,uint32_t msglen,uint64_t transaction_id,bool need_comp)1088 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1089                                      uint32_t size, uint16_t pkt_type,
1090                                      uint32_t msglen, uint64_t transaction_id,
1091                                      bool need_comp)
1092 {
1093     VMBusChanReq *req;
1094     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1095     uint32_t totlen = msgoff + msglen;
1096 
1097     req = g_malloc0(totlen);
1098     req->chan = chan;
1099     req->pkt_type = pkt_type;
1100     req->msg = (void *)req + msgoff;
1101     req->msglen = msglen;
1102     req->transaction_id = transaction_id;
1103     req->need_comp = need_comp;
1104     return req;
1105 }
1106 
vmbus_channel_recv_start(VMBusChannel * chan)1107 int vmbus_channel_recv_start(VMBusChannel *chan)
1108 {
1109     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1110     vmbus_ring_buffer *rb;
1111 
1112     rb = ringbuf_map_hdr(&ringbuf->common);
1113     if (!rb) {
1114         return -EFAULT;
1115     }
1116     ringbuf->last_seen_wr_idx = rb->write_index;
1117     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1118 
1119     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1120         return -EOVERFLOW;
1121     }
1122 
1123     /* prevent reorder of the following data operation with write_index read */
1124     smp_mb();                   /* barrier pair [C] */
1125     return 0;
1126 }
1127 
vmbus_channel_recv_peek(VMBusChannel * chan,uint32_t size)1128 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1129 {
1130     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1131     vmbus_packet_hdr hdr = {};
1132     VMBusChanReq *req;
1133     uint32_t avail;
1134     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1135 
1136     assert(size >= sizeof(*req));
1137 
1138     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1139     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1140                          ringbuf->common.len, true);
1141     if (avail < sizeof(hdr)) {
1142         return NULL;
1143     }
1144 
1145     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1146     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1147         return NULL;
1148     }
1149 
1150     pktlen = hdr.len_qwords * sizeof(uint64_t);
1151     totlen = pktlen + VMBUS_PKT_TRAILER;
1152     if (totlen > avail) {
1153         return NULL;
1154     }
1155 
1156     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1157     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1158         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1159         return NULL;
1160     }
1161 
1162     msglen = pktlen - msgoff;
1163 
1164     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1165                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1166 
1167     switch (hdr.type) {
1168     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1169         desclen = msgoff - sizeof(hdr);
1170         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1171                                 desclen) < 0) {
1172             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1173             goto free_req;
1174         }
1175         break;
1176     case VMBUS_PACKET_DATA_INBAND:
1177     case VMBUS_PACKET_COMP:
1178         break;
1179     default:
1180         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1181         goto free_req;
1182     }
1183 
1184     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1185     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1186         goto free_req;
1187     }
1188     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1189 
1190     return req;
1191 free_req:
1192     vmbus_free_req(req);
1193     return NULL;
1194 }
1195 
vmbus_channel_recv_pop(VMBusChannel * chan)1196 void vmbus_channel_recv_pop(VMBusChannel *chan)
1197 {
1198     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1199     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1200 }
1201 
vmbus_channel_recv_done(VMBusChannel * chan)1202 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1203 {
1204     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1205     vmbus_ring_buffer *rb;
1206     uint32_t read;
1207 
1208     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1209                         ringbuf->common.len, true);
1210     if (!read) {
1211         return 0;
1212     }
1213 
1214     rb = ringbuf_map_hdr(&ringbuf->common);
1215     if (!rb) {
1216         return -EFAULT;
1217     }
1218 
1219     /* prevent reorder with the data operation and packet read */
1220     smp_mb();                   /* barrier pair [B] */
1221     rb->read_index = ringbuf->rd_idx;
1222 
1223     /* prevent reorder of the following pending_send_sz read */
1224     smp_mb();                   /* barrier pair [A] */
1225 
1226     if (rb->interrupt_mask) {
1227         goto out;
1228     }
1229 
1230     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1231         uint32_t wr_idx, wr_avail;
1232         uint32_t wanted = rb->pending_send_sz;
1233 
1234         if (!wanted) {
1235             goto out;
1236         }
1237 
1238         /* prevent reorder with pending_send_sz read */
1239         smp_rmb();              /* barrier pair [D] */
1240         wr_idx = rb->write_index;
1241 
1242         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1243                                 true);
1244 
1245         /* the producer wasn't blocked on the consumer state */
1246         if (wr_avail >= read + wanted) {
1247             goto out;
1248         }
1249         /* there's not enough space for the producer to make progress */
1250         if (wr_avail < wanted) {
1251             goto out;
1252         }
1253     }
1254 
1255     vmbus_channel_notify_guest(chan);
1256 out:
1257     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1258     ringbuf->last_rd_idx = ringbuf->rd_idx;
1259     return read;
1260 }
1261 
vmbus_free_req(void * req)1262 void vmbus_free_req(void *req)
1263 {
1264     VMBusChanReq *r = req;
1265 
1266     if (!req) {
1267         return;
1268     }
1269 
1270     if (r->sgl.dev) {
1271         qemu_sglist_destroy(&r->sgl);
1272     }
1273     g_free(req);
1274 }
1275 
channel_event_cb(EventNotifier * e)1276 static void channel_event_cb(EventNotifier *e)
1277 {
1278     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1279     if (event_notifier_test_and_clear(e)) {
1280         /*
1281          * All receives are supposed to happen within the device worker, so
1282          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1283          * potentially reuse the cached mapping throughout the worker.
1284          * Can't do this for sends as they may happen outside the device
1285          * worker.
1286          */
1287         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1288         ringbuf_start_io(&ringbuf->common);
1289         chan->notify_cb(chan);
1290         ringbuf_end_io(&ringbuf->common);
1291 
1292     }
1293 }
1294 
alloc_chan_id(VMBus * vmbus)1295 static int alloc_chan_id(VMBus *vmbus)
1296 {
1297     int ret;
1298 
1299     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1300     if (ret == VMBUS_CHANID_COUNT) {
1301         return -ENOMEM;
1302     }
1303     return ret + VMBUS_FIRST_CHANID;
1304 }
1305 
register_chan_id(VMBusChannel * chan)1306 static int register_chan_id(VMBusChannel *chan)
1307 {
1308     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1309                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1310 }
1311 
unregister_chan_id(VMBusChannel * chan)1312 static void unregister_chan_id(VMBusChannel *chan)
1313 {
1314     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1315 }
1316 
chan_connection_id(VMBusChannel * chan)1317 static uint32_t chan_connection_id(VMBusChannel *chan)
1318 {
1319     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1320 }
1321 
init_channel(VMBus * vmbus,VMBusDevice * dev,VMBusDeviceClass * vdc,VMBusChannel * chan,uint16_t idx,Error ** errp)1322 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1323                          VMBusChannel *chan, uint16_t idx, Error **errp)
1324 {
1325     int res;
1326 
1327     chan->dev = dev;
1328     chan->notify_cb = vdc->chan_notify_cb;
1329     chan->subchan_idx = idx;
1330     chan->vmbus = vmbus;
1331 
1332     res = alloc_chan_id(vmbus);
1333     if (res < 0) {
1334         error_setg(errp, "no spare channel id");
1335         return;
1336     }
1337     chan->id = res;
1338     register_chan_id(chan);
1339 
1340     /*
1341      * The guest drivers depend on the device subchannels (idx #1+) to be
1342      * offered after the primary channel (idx #0) of that device.  To ensure
1343      * that, record the channels on the channel list in the order they appear
1344      * within the device.
1345      */
1346     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1347 }
1348 
deinit_channel(VMBusChannel * chan)1349 static void deinit_channel(VMBusChannel *chan)
1350 {
1351     assert(chan->state == VMCHAN_INIT);
1352     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1353     unregister_chan_id(chan);
1354 }
1355 
create_channels(VMBus * vmbus,VMBusDevice * dev,Error ** errp)1356 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1357 {
1358     uint16_t i;
1359     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1360     Error *err = NULL;
1361 
1362     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1363     if (dev->num_channels < 1) {
1364         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1365         return;
1366     }
1367 
1368     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1369     for (i = 0; i < dev->num_channels; i++) {
1370         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1371         if (err) {
1372             goto err_init;
1373         }
1374     }
1375 
1376     return;
1377 
1378 err_init:
1379     while (i--) {
1380         deinit_channel(&dev->channels[i]);
1381     }
1382     error_propagate(errp, err);
1383 }
1384 
free_channels(VMBusDevice * dev)1385 static void free_channels(VMBusDevice *dev)
1386 {
1387     uint16_t i;
1388     for (i = 0; i < dev->num_channels; i++) {
1389         deinit_channel(&dev->channels[i]);
1390     }
1391     g_free(dev->channels);
1392 }
1393 
make_sint_route(VMBus * vmbus,uint32_t vp_index)1394 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1395 {
1396     VMBusChannel *chan;
1397 
1398     if (vp_index == vmbus->target_vp) {
1399         hyperv_sint_route_ref(vmbus->sint_route);
1400         return vmbus->sint_route;
1401     }
1402 
1403     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1404         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1405             hyperv_sint_route_ref(chan->notify_route);
1406             return chan->notify_route;
1407         }
1408     }
1409 
1410     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1411 }
1412 
open_channel(VMBusChannel * chan)1413 static void open_channel(VMBusChannel *chan)
1414 {
1415     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1416 
1417     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1418     if (!chan->gpadl) {
1419         return;
1420     }
1421 
1422     if (ringbufs_init(chan)) {
1423         goto put_gpadl;
1424     }
1425 
1426     if (event_notifier_init(&chan->notifier, 0)) {
1427         goto put_gpadl;
1428     }
1429 
1430     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1431 
1432     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1433                                       &chan->notifier)) {
1434         goto cleanup_notifier;
1435     }
1436 
1437     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1438     if (!chan->notify_route) {
1439         goto clear_event_flag_handler;
1440     }
1441 
1442     if (vdc->open_channel && vdc->open_channel(chan)) {
1443         goto unref_sint_route;
1444     }
1445 
1446     chan->is_open = true;
1447     return;
1448 
1449 unref_sint_route:
1450     hyperv_sint_route_unref(chan->notify_route);
1451 clear_event_flag_handler:
1452     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1453 cleanup_notifier:
1454     event_notifier_set_handler(&chan->notifier, NULL);
1455     event_notifier_cleanup(&chan->notifier);
1456 put_gpadl:
1457     vmbus_put_gpadl(chan->gpadl);
1458 }
1459 
close_channel(VMBusChannel * chan)1460 static void close_channel(VMBusChannel *chan)
1461 {
1462     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1463 
1464     if (!chan->is_open) {
1465         return;
1466     }
1467 
1468     if (vdc->close_channel) {
1469         vdc->close_channel(chan);
1470     }
1471 
1472     hyperv_sint_route_unref(chan->notify_route);
1473     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1474     event_notifier_set_handler(&chan->notifier, NULL);
1475     event_notifier_cleanup(&chan->notifier);
1476     vmbus_put_gpadl(chan->gpadl);
1477     chan->is_open = false;
1478 }
1479 
channel_post_load(void * opaque,int version_id)1480 static int channel_post_load(void *opaque, int version_id)
1481 {
1482     VMBusChannel *chan = opaque;
1483 
1484     return register_chan_id(chan);
1485 }
1486 
1487 static const VMStateDescription vmstate_channel = {
1488     .name = "vmbus/channel",
1489     .version_id = 0,
1490     .minimum_version_id = 0,
1491     .post_load = channel_post_load,
1492     .fields = (const VMStateField[]) {
1493         VMSTATE_UINT32(id, VMBusChannel),
1494         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1495         VMSTATE_UINT32(open_id, VMBusChannel),
1496         VMSTATE_UINT32(target_vp, VMBusChannel),
1497         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1498         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1499         VMSTATE_UINT8(offer_state, VMBusChannel),
1500         VMSTATE_UINT8(state, VMBusChannel),
1501         VMSTATE_END_OF_LIST()
1502     }
1503 };
1504 
find_channel(VMBus * vmbus,uint32_t id)1505 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1506 {
1507     VMBusChannel *chan;
1508     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1509         if (chan->id == id) {
1510             return chan;
1511         }
1512     }
1513     return NULL;
1514 }
1515 
enqueue_incoming_message(VMBus * vmbus,const struct hyperv_post_message_input * msg)1516 static int enqueue_incoming_message(VMBus *vmbus,
1517                                     const struct hyperv_post_message_input *msg)
1518 {
1519     int ret = 0;
1520     uint8_t idx, prev_size;
1521 
1522     qemu_mutex_lock(&vmbus->rx_queue_lock);
1523 
1524     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1525         ret = -ENOBUFS;
1526         goto out;
1527     }
1528 
1529     prev_size = vmbus->rx_queue_size;
1530     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1531     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1532     vmbus->rx_queue_size++;
1533 
1534     /* only need to resched if the queue was empty before */
1535     if (!prev_size) {
1536         vmbus_resched(vmbus);
1537     }
1538 out:
1539     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1540     return ret;
1541 }
1542 
vmbus_recv_message(const struct hyperv_post_message_input * msg,void * data)1543 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1544                                    void *data)
1545 {
1546     VMBus *vmbus = data;
1547     struct vmbus_message_header *vmbus_msg;
1548 
1549     if (msg->message_type != HV_MESSAGE_VMBUS) {
1550         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1551     }
1552 
1553     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1554         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1555     }
1556 
1557     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1558 
1559     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1560 
1561     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1562         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1563         error_report("vmbus: unknown message type %#x",
1564                      vmbus_msg->message_type);
1565         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1566     }
1567 
1568     if (enqueue_incoming_message(vmbus, msg)) {
1569         return HV_STATUS_INSUFFICIENT_BUFFERS;
1570     }
1571     return HV_STATUS_SUCCESS;
1572 }
1573 
vmbus_initialized(VMBus * vmbus)1574 static bool vmbus_initialized(VMBus *vmbus)
1575 {
1576     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1577 }
1578 
vmbus_reset_all(VMBus * vmbus)1579 static void vmbus_reset_all(VMBus *vmbus)
1580 {
1581     bus_cold_reset(BUS(vmbus));
1582 }
1583 
post_msg(VMBus * vmbus,void * msgdata,uint32_t msglen)1584 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1585 {
1586     int ret;
1587     struct hyperv_message msg = {
1588         .header.message_type = HV_MESSAGE_VMBUS,
1589     };
1590 
1591     assert(!vmbus->msg_in_progress);
1592     assert(msglen <= sizeof(msg.payload));
1593     assert(msglen >= sizeof(struct vmbus_message_header));
1594 
1595     vmbus->msg_in_progress = true;
1596 
1597     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1598                          msglen);
1599 
1600     memcpy(msg.payload, msgdata, msglen);
1601     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1602 
1603     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1604     if (ret == 0 || ret == -EAGAIN) {
1605         return;
1606     }
1607 
1608     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1609     vmbus_reset_all(vmbus);
1610 }
1611 
vmbus_init(VMBus * vmbus)1612 static int vmbus_init(VMBus *vmbus)
1613 {
1614     if (vmbus->target_vp != (uint32_t)-1) {
1615         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1616                                                   vmbus_msg_cb, vmbus);
1617         if (!vmbus->sint_route) {
1618             error_report("failed to set up SINT route");
1619             return -ENOMEM;
1620         }
1621     }
1622     return 0;
1623 }
1624 
vmbus_deinit(VMBus * vmbus)1625 static void vmbus_deinit(VMBus *vmbus)
1626 {
1627     VMBusGpadl *gpadl, *tmp_gpadl;
1628     VMBusChannel *chan;
1629 
1630     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1631         if (gpadl->state == VMGPADL_TORNDOWN) {
1632             continue;
1633         }
1634         vmbus_put_gpadl(gpadl);
1635     }
1636 
1637     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1638         chan->offer_state = VMOFFER_INIT;
1639     }
1640 
1641     hyperv_sint_route_unref(vmbus->sint_route);
1642     vmbus->sint_route = NULL;
1643     vmbus->int_page_gpa = 0;
1644     vmbus->target_vp = (uint32_t)-1;
1645     vmbus->version = 0;
1646     vmbus->state = VMBUS_LISTEN;
1647     vmbus->msg_in_progress = false;
1648 }
1649 
handle_initiate_contact(VMBus * vmbus,vmbus_message_initiate_contact * msg,uint32_t msglen)1650 static void handle_initiate_contact(VMBus *vmbus,
1651                                     vmbus_message_initiate_contact *msg,
1652                                     uint32_t msglen)
1653 {
1654     if (msglen < sizeof(*msg)) {
1655         return;
1656     }
1657 
1658     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1659                                  msg->version_requested & 0xffff,
1660                                  msg->target_vcpu, msg->monitor_page1,
1661                                  msg->monitor_page2, msg->interrupt_page);
1662 
1663     /*
1664      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1665      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1666      * before handing over to OS loader.
1667      */
1668     vmbus_reset_all(vmbus);
1669 
1670     vmbus->target_vp = msg->target_vcpu;
1671     vmbus->version = msg->version_requested;
1672     if (vmbus->version < VMBUS_VERSION_WIN8) {
1673         /* linux passes interrupt page even when it doesn't need it */
1674         vmbus->int_page_gpa = msg->interrupt_page;
1675     }
1676     vmbus->state = VMBUS_HANDSHAKE;
1677 
1678     if (vmbus_init(vmbus)) {
1679         error_report("failed to init vmbus; aborting");
1680         vmbus_deinit(vmbus);
1681         return;
1682     }
1683 }
1684 
send_handshake(VMBus * vmbus)1685 static void send_handshake(VMBus *vmbus)
1686 {
1687     struct vmbus_message_version_response msg = {
1688         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1689         .version_supported = vmbus_initialized(vmbus),
1690     };
1691 
1692     post_msg(vmbus, &msg, sizeof(msg));
1693 }
1694 
handle_request_offers(VMBus * vmbus,void * msgdata,uint32_t msglen)1695 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1696 {
1697     VMBusChannel *chan;
1698 
1699     if (!vmbus_initialized(vmbus)) {
1700         return;
1701     }
1702 
1703     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1704         if (chan->offer_state == VMOFFER_INIT) {
1705             chan->offer_state = VMOFFER_SENDING;
1706             break;
1707         }
1708     }
1709 
1710     vmbus->state = VMBUS_OFFER;
1711 }
1712 
send_offer(VMBus * vmbus)1713 static void send_offer(VMBus *vmbus)
1714 {
1715     VMBusChannel *chan;
1716     struct vmbus_message_header alloffers_msg = {
1717         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1718     };
1719 
1720     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1721         if (chan->offer_state == VMOFFER_SENDING) {
1722             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1723             /* Hyper-V wants LE GUIDs */
1724             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1725             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1726             struct vmbus_message_offer_channel msg = {
1727                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1728                 .child_relid = chan->id,
1729                 .connection_id = chan_connection_id(chan),
1730                 .channel_flags = vdc->channel_flags,
1731                 .mmio_size_mb = vdc->mmio_size_mb,
1732                 .sub_channel_index = vmbus_channel_idx(chan),
1733                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1734             };
1735 
1736             memcpy(msg.type_uuid, &classid, sizeof(classid));
1737             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1738 
1739             trace_vmbus_send_offer(chan->id, chan->dev);
1740 
1741             post_msg(vmbus, &msg, sizeof(msg));
1742             return;
1743         }
1744     }
1745 
1746     /* no more offers, send terminator message */
1747     trace_vmbus_terminate_offers();
1748     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1749 }
1750 
complete_offer(VMBus * vmbus)1751 static bool complete_offer(VMBus *vmbus)
1752 {
1753     VMBusChannel *chan;
1754 
1755     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1756         if (chan->offer_state == VMOFFER_SENDING) {
1757             chan->offer_state = VMOFFER_SENT;
1758             goto next_offer;
1759         }
1760     }
1761     /*
1762      * no transitioning channels found so this is completing the terminator
1763      * message, and vmbus can move to the next state
1764      */
1765     return true;
1766 
1767 next_offer:
1768     /* try to mark another channel for offering */
1769     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1770         if (chan->offer_state == VMOFFER_INIT) {
1771             chan->offer_state = VMOFFER_SENDING;
1772             break;
1773         }
1774     }
1775     /*
1776      * if an offer has been sent there are more offers or the terminator yet to
1777      * send, so no state transition for vmbus
1778      */
1779     return false;
1780 }
1781 
1782 
handle_gpadl_header(VMBus * vmbus,vmbus_message_gpadl_header * msg,uint32_t msglen)1783 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1784                                 uint32_t msglen)
1785 {
1786     VMBusGpadl *gpadl;
1787     uint32_t num_gfns, i;
1788 
1789     /* must include at least one gpa range */
1790     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1791         !vmbus_initialized(vmbus)) {
1792         return;
1793     }
1794 
1795     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1796                sizeof(msg->range[0].pfn_array[0]);
1797 
1798     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1799 
1800     /*
1801      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1802      * ranges each with arbitrary size and alignment.  However in practice only
1803      * single-range page-aligned GPADLs have been observed so just ignore
1804      * anything else and simplify things greatly.
1805      */
1806     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1807         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1808         return;
1809     }
1810 
1811     /* ignore requests to create already existing GPADLs */
1812     if (find_gpadl(vmbus, msg->gpadl_id)) {
1813         return;
1814     }
1815 
1816     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1817 
1818     for (i = 0; i < num_gfns &&
1819          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1820          i++) {
1821         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1822     }
1823 
1824     if (gpadl_full(gpadl)) {
1825         vmbus->state = VMBUS_CREATE_GPADL;
1826     }
1827 }
1828 
handle_gpadl_body(VMBus * vmbus,vmbus_message_gpadl_body * msg,uint32_t msglen)1829 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1830                               uint32_t msglen)
1831 {
1832     VMBusGpadl *gpadl;
1833     uint32_t num_gfns_left, i;
1834 
1835     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1836         return;
1837     }
1838 
1839     trace_vmbus_gpadl_body(msg->gpadl_id);
1840 
1841     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1842     if (!gpadl) {
1843         return;
1844     }
1845 
1846     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1847     assert(num_gfns_left);
1848 
1849     for (i = 0; i < num_gfns_left &&
1850          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1851         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1852     }
1853 
1854     if (gpadl_full(gpadl)) {
1855         vmbus->state = VMBUS_CREATE_GPADL;
1856     }
1857 }
1858 
send_create_gpadl(VMBus * vmbus)1859 static void send_create_gpadl(VMBus *vmbus)
1860 {
1861     VMBusGpadl *gpadl;
1862 
1863     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1864         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1865             struct vmbus_message_gpadl_created msg = {
1866                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1867                 .gpadl_id = gpadl->id,
1868                 .child_relid = gpadl->child_relid,
1869             };
1870 
1871             trace_vmbus_gpadl_created(gpadl->id);
1872             post_msg(vmbus, &msg, sizeof(msg));
1873             return;
1874         }
1875     }
1876 
1877     g_assert_not_reached();
1878 }
1879 
complete_create_gpadl(VMBus * vmbus)1880 static bool complete_create_gpadl(VMBus *vmbus)
1881 {
1882     VMBusGpadl *gpadl;
1883 
1884     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1885         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1886             gpadl->state = VMGPADL_ALIVE;
1887 
1888             return true;
1889         }
1890     }
1891 
1892     g_assert_not_reached();
1893 }
1894 
handle_gpadl_teardown(VMBus * vmbus,vmbus_message_gpadl_teardown * msg,uint32_t msglen)1895 static void handle_gpadl_teardown(VMBus *vmbus,
1896                                   vmbus_message_gpadl_teardown *msg,
1897                                   uint32_t msglen)
1898 {
1899     VMBusGpadl *gpadl;
1900 
1901     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1902         return;
1903     }
1904 
1905     trace_vmbus_gpadl_teardown(msg->gpadl_id);
1906 
1907     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1908     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
1909         return;
1910     }
1911 
1912     gpadl->state = VMGPADL_TEARINGDOWN;
1913     vmbus->state = VMBUS_TEARDOWN_GPADL;
1914 }
1915 
send_teardown_gpadl(VMBus * vmbus)1916 static void send_teardown_gpadl(VMBus *vmbus)
1917 {
1918     VMBusGpadl *gpadl;
1919 
1920     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1921         if (gpadl->state == VMGPADL_TEARINGDOWN) {
1922             struct vmbus_message_gpadl_torndown msg = {
1923                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
1924                 .gpadl_id = gpadl->id,
1925             };
1926 
1927             trace_vmbus_gpadl_torndown(gpadl->id);
1928             post_msg(vmbus, &msg, sizeof(msg));
1929             return;
1930         }
1931     }
1932 
1933     g_assert_not_reached();
1934 }
1935 
complete_teardown_gpadl(VMBus * vmbus)1936 static bool complete_teardown_gpadl(VMBus *vmbus)
1937 {
1938     VMBusGpadl *gpadl;
1939 
1940     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1941         if (gpadl->state == VMGPADL_TEARINGDOWN) {
1942             gpadl->state = VMGPADL_TORNDOWN;
1943             vmbus_put_gpadl(gpadl);
1944             return true;
1945         }
1946     }
1947 
1948     g_assert_not_reached();
1949 }
1950 
handle_open_channel(VMBus * vmbus,vmbus_message_open_channel * msg,uint32_t msglen)1951 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
1952                                 uint32_t msglen)
1953 {
1954     VMBusChannel *chan;
1955 
1956     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1957         return;
1958     }
1959 
1960     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
1961                              msg->target_vp);
1962     chan = find_channel(vmbus, msg->child_relid);
1963     if (!chan || chan->state != VMCHAN_INIT) {
1964         return;
1965     }
1966 
1967     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
1968     chan->ringbuf_send_offset = msg->ring_buffer_offset;
1969     chan->target_vp = msg->target_vp;
1970     chan->open_id = msg->open_id;
1971 
1972     open_channel(chan);
1973 
1974     chan->state = VMCHAN_OPENING;
1975     vmbus->state = VMBUS_OPEN_CHANNEL;
1976 }
1977 
send_open_channel(VMBus * vmbus)1978 static void send_open_channel(VMBus *vmbus)
1979 {
1980     VMBusChannel *chan;
1981 
1982     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1983         if (chan->state == VMCHAN_OPENING) {
1984             struct vmbus_message_open_result msg = {
1985                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
1986                 .child_relid = chan->id,
1987                 .open_id = chan->open_id,
1988                 .status = !vmbus_channel_is_open(chan),
1989             };
1990 
1991             trace_vmbus_channel_open(chan->id, msg.status);
1992             post_msg(vmbus, &msg, sizeof(msg));
1993             return;
1994         }
1995     }
1996 
1997     g_assert_not_reached();
1998 }
1999 
complete_open_channel(VMBus * vmbus)2000 static bool complete_open_channel(VMBus *vmbus)
2001 {
2002     VMBusChannel *chan;
2003 
2004     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2005         if (chan->state == VMCHAN_OPENING) {
2006             if (vmbus_channel_is_open(chan)) {
2007                 chan->state = VMCHAN_OPEN;
2008                 /*
2009                  * simulate guest notification of ringbuffer space made
2010                  * available, for the channel protocols where the host
2011                  * initiates the communication
2012                  */
2013                 vmbus_channel_notify_host(chan);
2014             } else {
2015                 chan->state = VMCHAN_INIT;
2016             }
2017             return true;
2018         }
2019     }
2020 
2021     g_assert_not_reached();
2022 }
2023 
vdev_reset_on_close(VMBusDevice * vdev)2024 static void vdev_reset_on_close(VMBusDevice *vdev)
2025 {
2026     uint16_t i;
2027 
2028     for (i = 0; i < vdev->num_channels; i++) {
2029         if (vmbus_channel_is_open(&vdev->channels[i])) {
2030             return;
2031         }
2032     }
2033 
2034     /* all channels closed -- reset device */
2035     device_cold_reset(DEVICE(vdev));
2036 }
2037 
handle_close_channel(VMBus * vmbus,vmbus_message_close_channel * msg,uint32_t msglen)2038 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2039                                  uint32_t msglen)
2040 {
2041     VMBusChannel *chan;
2042 
2043     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2044         return;
2045     }
2046 
2047     trace_vmbus_close_channel(msg->child_relid);
2048 
2049     chan = find_channel(vmbus, msg->child_relid);
2050     if (!chan) {
2051         return;
2052     }
2053 
2054     close_channel(chan);
2055     chan->state = VMCHAN_INIT;
2056 
2057     vdev_reset_on_close(chan->dev);
2058 }
2059 
handle_unload(VMBus * vmbus,void * msg,uint32_t msglen)2060 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2061 {
2062     vmbus->state = VMBUS_UNLOAD;
2063 }
2064 
send_unload(VMBus * vmbus)2065 static void send_unload(VMBus *vmbus)
2066 {
2067     vmbus_message_header msg = {
2068         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2069     };
2070 
2071     qemu_mutex_lock(&vmbus->rx_queue_lock);
2072     vmbus->rx_queue_size = 0;
2073     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2074 
2075     post_msg(vmbus, &msg, sizeof(msg));
2076     return;
2077 }
2078 
complete_unload(VMBus * vmbus)2079 static bool complete_unload(VMBus *vmbus)
2080 {
2081     vmbus_reset_all(vmbus);
2082     return true;
2083 }
2084 
process_message(VMBus * vmbus)2085 static void process_message(VMBus *vmbus)
2086 {
2087     struct hyperv_post_message_input *hv_msg;
2088     struct vmbus_message_header *msg;
2089     void *msgdata;
2090     uint32_t msglen;
2091 
2092     qemu_mutex_lock(&vmbus->rx_queue_lock);
2093 
2094     if (!vmbus->rx_queue_size) {
2095         goto unlock;
2096     }
2097 
2098     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2099     msglen =  hv_msg->payload_size;
2100     if (msglen < sizeof(*msg)) {
2101         goto out;
2102     }
2103     msgdata = hv_msg->payload;
2104     msg = msgdata;
2105 
2106     trace_vmbus_process_incoming_message(msg->message_type);
2107 
2108     switch (msg->message_type) {
2109     case VMBUS_MSG_INITIATE_CONTACT:
2110         handle_initiate_contact(vmbus, msgdata, msglen);
2111         break;
2112     case VMBUS_MSG_REQUESTOFFERS:
2113         handle_request_offers(vmbus, msgdata, msglen);
2114         break;
2115     case VMBUS_MSG_GPADL_HEADER:
2116         handle_gpadl_header(vmbus, msgdata, msglen);
2117         break;
2118     case VMBUS_MSG_GPADL_BODY:
2119         handle_gpadl_body(vmbus, msgdata, msglen);
2120         break;
2121     case VMBUS_MSG_GPADL_TEARDOWN:
2122         handle_gpadl_teardown(vmbus, msgdata, msglen);
2123         break;
2124     case VMBUS_MSG_OPENCHANNEL:
2125         handle_open_channel(vmbus, msgdata, msglen);
2126         break;
2127     case VMBUS_MSG_CLOSECHANNEL:
2128         handle_close_channel(vmbus, msgdata, msglen);
2129         break;
2130     case VMBUS_MSG_UNLOAD:
2131         handle_unload(vmbus, msgdata, msglen);
2132         break;
2133     default:
2134         error_report("unknown message type %#x", msg->message_type);
2135         break;
2136     }
2137 
2138 out:
2139     vmbus->rx_queue_size--;
2140     vmbus->rx_queue_head++;
2141     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2142 
2143     vmbus_resched(vmbus);
2144 unlock:
2145     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2146 }
2147 
2148 static const struct {
2149     void (*run)(VMBus *vmbus);
2150     bool (*complete)(VMBus *vmbus);
2151 } state_runner[] = {
2152     [VMBUS_LISTEN]         = {process_message,     NULL},
2153     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2154     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2155     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2156     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2157     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2158     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2159 };
2160 
vmbus_do_run(VMBus * vmbus)2161 static void vmbus_do_run(VMBus *vmbus)
2162 {
2163     if (vmbus->msg_in_progress) {
2164         return;
2165     }
2166 
2167     assert(vmbus->state < VMBUS_STATE_MAX);
2168     assert(state_runner[vmbus->state].run);
2169     state_runner[vmbus->state].run(vmbus);
2170 }
2171 
vmbus_run(void * opaque)2172 static void vmbus_run(void *opaque)
2173 {
2174     VMBus *vmbus = opaque;
2175 
2176     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2177     if (vmbus->in_progress) {
2178         return;
2179     }
2180 
2181     vmbus->in_progress = true;
2182     /*
2183      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2184      * should go *after* the code that can result in aio_poll; otherwise
2185      * reschedules can be missed.  No idea how to enforce that.
2186      */
2187     vmbus_do_run(vmbus);
2188     vmbus->in_progress = false;
2189 }
2190 
vmbus_msg_cb(void * data,int status)2191 static void vmbus_msg_cb(void *data, int status)
2192 {
2193     VMBus *vmbus = data;
2194     bool (*complete)(VMBus *vmbus);
2195 
2196     assert(vmbus->msg_in_progress);
2197 
2198     trace_vmbus_msg_cb(status);
2199 
2200     if (status == -EAGAIN) {
2201         goto out;
2202     }
2203     if (status) {
2204         error_report("message delivery fatal failure: %d; aborting vmbus",
2205                      status);
2206         vmbus_reset_all(vmbus);
2207         return;
2208     }
2209 
2210     assert(vmbus->state < VMBUS_STATE_MAX);
2211     complete = state_runner[vmbus->state].complete;
2212     if (!complete || complete(vmbus)) {
2213         vmbus->state = VMBUS_LISTEN;
2214     }
2215 out:
2216     vmbus->msg_in_progress = false;
2217     vmbus_resched(vmbus);
2218 }
2219 
vmbus_resched(VMBus * vmbus)2220 static void vmbus_resched(VMBus *vmbus)
2221 {
2222     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2223 }
2224 
vmbus_signal_event(EventNotifier * e)2225 static void vmbus_signal_event(EventNotifier *e)
2226 {
2227     VMBusChannel *chan;
2228     VMBus *vmbus = container_of(e, VMBus, notifier);
2229     unsigned long *int_map;
2230     hwaddr addr, len;
2231     bool is_dirty = false;
2232 
2233     if (!event_notifier_test_and_clear(e)) {
2234         return;
2235     }
2236 
2237     trace_vmbus_signal_event();
2238 
2239     if (!vmbus->int_page_gpa) {
2240         return;
2241     }
2242 
2243     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2244     len = TARGET_PAGE_SIZE / 2;
2245     int_map = cpu_physical_memory_map(addr, &len, 1);
2246     if (len != TARGET_PAGE_SIZE / 2) {
2247         goto unmap;
2248     }
2249 
2250     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2251         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2252             if (!vmbus_channel_is_open(chan)) {
2253                 continue;
2254             }
2255             vmbus_channel_notify_host(chan);
2256             is_dirty = true;
2257         }
2258     }
2259 
2260 unmap:
2261     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2262 }
2263 
vmbus_dev_realize(DeviceState * dev,Error ** errp)2264 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2265 {
2266     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2267     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2268     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2269     BusChild *child;
2270     Error *err = NULL;
2271     char idstr[UUID_STR_LEN];
2272 
2273     assert(!qemu_uuid_is_null(&vdev->instanceid));
2274 
2275     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2276         /* Class wants to only have a single instance with a fixed UUID */
2277         if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2278             error_setg(&err, "instance id can't be changed");
2279             goto error_out;
2280         }
2281     }
2282 
2283     /* Check for instance id collision for this class id */
2284     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2285         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2286 
2287         if (child_dev == vdev) {
2288             continue;
2289         }
2290 
2291         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2292             qemu_uuid_unparse(&vdev->instanceid, idstr);
2293             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2294             goto error_out;
2295         }
2296     }
2297 
2298     vdev->dma_as = &address_space_memory;
2299 
2300     create_channels(vmbus, vdev, &err);
2301     if (err) {
2302         goto error_out;
2303     }
2304 
2305     if (vdc->vmdev_realize) {
2306         vdc->vmdev_realize(vdev, &err);
2307         if (err) {
2308             goto err_vdc_realize;
2309         }
2310     }
2311     return;
2312 
2313 err_vdc_realize:
2314     free_channels(vdev);
2315 error_out:
2316     error_propagate(errp, err);
2317 }
2318 
vmbus_dev_reset(DeviceState * dev)2319 static void vmbus_dev_reset(DeviceState *dev)
2320 {
2321     uint16_t i;
2322     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2323     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2324 
2325     if (vdev->channels) {
2326         for (i = 0; i < vdev->num_channels; i++) {
2327             VMBusChannel *chan = &vdev->channels[i];
2328             close_channel(chan);
2329             chan->state = VMCHAN_INIT;
2330         }
2331     }
2332 
2333     if (vdc->vmdev_reset) {
2334         vdc->vmdev_reset(vdev);
2335     }
2336 }
2337 
vmbus_dev_unrealize(DeviceState * dev)2338 static void vmbus_dev_unrealize(DeviceState *dev)
2339 {
2340     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2341     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2342 
2343     if (vdc->vmdev_unrealize) {
2344         vdc->vmdev_unrealize(vdev);
2345     }
2346     free_channels(vdev);
2347 }
2348 
2349 static Property vmbus_dev_props[] = {
2350     DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2351     DEFINE_PROP_END_OF_LIST()
2352 };
2353 
2354 
vmbus_dev_class_init(ObjectClass * klass,void * data)2355 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2356 {
2357     DeviceClass *kdev = DEVICE_CLASS(klass);
2358     device_class_set_props(kdev, vmbus_dev_props);
2359     kdev->bus_type = TYPE_VMBUS;
2360     kdev->realize = vmbus_dev_realize;
2361     kdev->unrealize = vmbus_dev_unrealize;
2362     device_class_set_legacy_reset(kdev, vmbus_dev_reset);
2363 }
2364 
vmbus_dev_instance_init(Object * obj)2365 static void vmbus_dev_instance_init(Object *obj)
2366 {
2367     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2368     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2369 
2370     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2371         /* Class wants to only have a single instance with a fixed UUID */
2372         vdev->instanceid = vdc->instanceid;
2373     }
2374 }
2375 
2376 const VMStateDescription vmstate_vmbus_dev = {
2377     .name = TYPE_VMBUS_DEVICE,
2378     .version_id = 0,
2379     .minimum_version_id = 0,
2380     .fields = (const VMStateField[]) {
2381         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2382         VMSTATE_UINT16(num_channels, VMBusDevice),
2383         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2384                                              num_channels, vmstate_channel,
2385                                              VMBusChannel),
2386         VMSTATE_END_OF_LIST()
2387     }
2388 };
2389 
2390 /* vmbus generic device base */
2391 static const TypeInfo vmbus_dev_type_info = {
2392     .name = TYPE_VMBUS_DEVICE,
2393     .parent = TYPE_DEVICE,
2394     .abstract = true,
2395     .instance_size = sizeof(VMBusDevice),
2396     .class_size = sizeof(VMBusDeviceClass),
2397     .class_init = vmbus_dev_class_init,
2398     .instance_init = vmbus_dev_instance_init,
2399 };
2400 
vmbus_realize(BusState * bus,Error ** errp)2401 static void vmbus_realize(BusState *bus, Error **errp)
2402 {
2403     int ret = 0;
2404     VMBus *vmbus = VMBUS(bus);
2405 
2406     qemu_mutex_init(&vmbus->rx_queue_lock);
2407 
2408     QTAILQ_INIT(&vmbus->gpadl_list);
2409     QTAILQ_INIT(&vmbus->channel_list);
2410 
2411     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2412                                  vmbus_recv_message, vmbus);
2413     if (ret != 0) {
2414         error_setg(errp, "hyperv set message handler failed: %d", ret);
2415         goto error_out;
2416     }
2417 
2418     ret = event_notifier_init(&vmbus->notifier, 0);
2419     if (ret != 0) {
2420         error_setg(errp, "event notifier failed to init with %d", ret);
2421         goto remove_msg_handler;
2422     }
2423 
2424     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2425     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2426                                         &vmbus->notifier);
2427     if (ret != 0) {
2428         error_setg(errp, "hyperv set event handler failed with %d", ret);
2429         goto clear_event_notifier;
2430     }
2431 
2432     return;
2433 
2434 clear_event_notifier:
2435     event_notifier_cleanup(&vmbus->notifier);
2436 remove_msg_handler:
2437     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2438 error_out:
2439     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2440 }
2441 
vmbus_unrealize(BusState * bus)2442 static void vmbus_unrealize(BusState *bus)
2443 {
2444     VMBus *vmbus = VMBUS(bus);
2445 
2446     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2447     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2448     event_notifier_cleanup(&vmbus->notifier);
2449 
2450     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2451 }
2452 
vmbus_reset_hold(Object * obj,ResetType type)2453 static void vmbus_reset_hold(Object *obj, ResetType type)
2454 {
2455     vmbus_deinit(VMBUS(obj));
2456 }
2457 
vmbus_get_dev_path(DeviceState * dev)2458 static char *vmbus_get_dev_path(DeviceState *dev)
2459 {
2460     BusState *bus = qdev_get_parent_bus(dev);
2461     return qdev_get_dev_path(bus->parent);
2462 }
2463 
vmbus_get_fw_dev_path(DeviceState * dev)2464 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2465 {
2466     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2467     char uuid[UUID_STR_LEN];
2468 
2469     qemu_uuid_unparse(&vdev->instanceid, uuid);
2470     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2471 }
2472 
vmbus_class_init(ObjectClass * klass,void * data)2473 static void vmbus_class_init(ObjectClass *klass, void *data)
2474 {
2475     BusClass *k = BUS_CLASS(klass);
2476     ResettableClass *rc = RESETTABLE_CLASS(klass);
2477 
2478     k->get_dev_path = vmbus_get_dev_path;
2479     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2480     k->realize = vmbus_realize;
2481     k->unrealize = vmbus_unrealize;
2482     rc->phases.hold = vmbus_reset_hold;
2483 }
2484 
vmbus_pre_load(void * opaque)2485 static int vmbus_pre_load(void *opaque)
2486 {
2487     VMBusChannel *chan;
2488     VMBus *vmbus = VMBUS(opaque);
2489 
2490     /*
2491      * channel IDs allocated by the source will come in the migration stream
2492      * for each channel, so clean up the ones allocated at realize
2493      */
2494     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2495         unregister_chan_id(chan);
2496     }
2497 
2498     return 0;
2499 }
vmbus_post_load(void * opaque,int version_id)2500 static int vmbus_post_load(void *opaque, int version_id)
2501 {
2502     int ret;
2503     VMBus *vmbus = VMBUS(opaque);
2504     VMBusGpadl *gpadl;
2505     VMBusChannel *chan;
2506 
2507     ret = vmbus_init(vmbus);
2508     if (ret) {
2509         return ret;
2510     }
2511 
2512     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2513         gpadl->vmbus = vmbus;
2514         gpadl->refcount = 1;
2515     }
2516 
2517     /*
2518      * reopening channels depends on initialized vmbus so it's done here
2519      * instead of channel_post_load()
2520      */
2521     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2522 
2523         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2524             open_channel(chan);
2525         }
2526 
2527         if (chan->state != VMCHAN_OPEN) {
2528             continue;
2529         }
2530 
2531         if (!vmbus_channel_is_open(chan)) {
2532             /* reopen failed, abort loading */
2533             return -1;
2534         }
2535 
2536         /* resume processing on the guest side if it missed the notification */
2537         hyperv_sint_route_set_sint(chan->notify_route);
2538         /* ditto on the host side */
2539         vmbus_channel_notify_host(chan);
2540     }
2541 
2542     vmbus_resched(vmbus);
2543     return 0;
2544 }
2545 
2546 static const VMStateDescription vmstate_post_message_input = {
2547     .name = "vmbus/hyperv_post_message_input",
2548     .version_id = 0,
2549     .minimum_version_id = 0,
2550     .fields = (const VMStateField[]) {
2551         /*
2552          * skip connection_id and message_type as they are validated before
2553          * queueing and ignored on dequeueing
2554          */
2555         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2556         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2557                             HV_MESSAGE_PAYLOAD_SIZE),
2558         VMSTATE_END_OF_LIST()
2559     }
2560 };
2561 
vmbus_rx_queue_needed(void * opaque)2562 static bool vmbus_rx_queue_needed(void *opaque)
2563 {
2564     VMBus *vmbus = VMBUS(opaque);
2565     return vmbus->rx_queue_size;
2566 }
2567 
2568 static const VMStateDescription vmstate_rx_queue = {
2569     .name = "vmbus/rx_queue",
2570     .version_id = 0,
2571     .minimum_version_id = 0,
2572     .needed = vmbus_rx_queue_needed,
2573     .fields = (const VMStateField[]) {
2574         VMSTATE_UINT8(rx_queue_head, VMBus),
2575         VMSTATE_UINT8(rx_queue_size, VMBus),
2576         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2577                              HV_MSG_QUEUE_LEN, 0,
2578                              vmstate_post_message_input,
2579                              struct hyperv_post_message_input),
2580         VMSTATE_END_OF_LIST()
2581     }
2582 };
2583 
2584 static const VMStateDescription vmstate_vmbus = {
2585     .name = TYPE_VMBUS,
2586     .version_id = 0,
2587     .minimum_version_id = 0,
2588     .pre_load = vmbus_pre_load,
2589     .post_load = vmbus_post_load,
2590     .fields = (const VMStateField[]) {
2591         VMSTATE_UINT8(state, VMBus),
2592         VMSTATE_UINT32(version, VMBus),
2593         VMSTATE_UINT32(target_vp, VMBus),
2594         VMSTATE_UINT64(int_page_gpa, VMBus),
2595         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2596                          vmstate_gpadl, VMBusGpadl, link),
2597         VMSTATE_END_OF_LIST()
2598     },
2599     .subsections = (const VMStateDescription * const []) {
2600         &vmstate_rx_queue,
2601         NULL
2602     }
2603 };
2604 
2605 static const TypeInfo vmbus_type_info = {
2606     .name = TYPE_VMBUS,
2607     .parent = TYPE_BUS,
2608     .instance_size = sizeof(VMBus),
2609     .class_init = vmbus_class_init,
2610 };
2611 
vmbus_bridge_realize(DeviceState * dev,Error ** errp)2612 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2613 {
2614     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2615 
2616     /*
2617      * here there's at least one vmbus bridge that is being realized, so
2618      * vmbus_bridge_find can only return NULL if it's not unique
2619      */
2620     if (!vmbus_bridge_find()) {
2621         error_setg(errp, "there can be at most one %s in the system",
2622                    TYPE_VMBUS_BRIDGE);
2623         return;
2624     }
2625 
2626     if (!hyperv_is_synic_enabled()) {
2627         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2628         return;
2629     }
2630 
2631     if (!hyperv_are_vmbus_recommended_features_enabled()) {
2632         warn_report("VMBus enabled without the recommended set of Hyper-V features: "
2633                     "hv-stimer, hv-vapic and hv-runtime. "
2634                     "Some Windows versions might not boot or enable the VMBus device");
2635     }
2636 
2637     bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2638 }
2639 
vmbus_bridge_ofw_unit_address(const SysBusDevice * dev)2640 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2641 {
2642     /* there can be only one VMBus */
2643     return g_strdup("0");
2644 }
2645 
2646 static const VMStateDescription vmstate_vmbus_bridge = {
2647     .name = TYPE_VMBUS_BRIDGE,
2648     .version_id = 0,
2649     .minimum_version_id = 0,
2650     .fields = (const VMStateField[]) {
2651         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2652         VMSTATE_END_OF_LIST()
2653     },
2654 };
2655 
2656 static Property vmbus_bridge_props[] = {
2657     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2658     DEFINE_PROP_END_OF_LIST()
2659 };
2660 
vmbus_bridge_class_init(ObjectClass * klass,void * data)2661 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2662 {
2663     DeviceClass *k = DEVICE_CLASS(klass);
2664     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2665 
2666     k->realize = vmbus_bridge_realize;
2667     k->fw_name = "vmbus";
2668     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2669     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2670     k->vmsd = &vmstate_vmbus_bridge;
2671     device_class_set_props(k, vmbus_bridge_props);
2672     /* override SysBusDevice's default */
2673     k->user_creatable = true;
2674 }
2675 
2676 static const TypeInfo vmbus_bridge_type_info = {
2677     .name = TYPE_VMBUS_BRIDGE,
2678     .parent = TYPE_SYS_BUS_DEVICE,
2679     .instance_size = sizeof(VMBusBridge),
2680     .class_init = vmbus_bridge_class_init,
2681 };
2682 
vmbus_register_types(void)2683 static void vmbus_register_types(void)
2684 {
2685     type_register_static(&vmbus_bridge_type_info);
2686     type_register_static(&vmbus_dev_type_info);
2687     type_register_static(&vmbus_type_info);
2688 }
2689 
2690 type_init(vmbus_register_types)
2691