xref: /openbmc/qemu/hw/hyperv/vmbus.c (revision dd205025)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/hyperv/hyperv.h"
17 #include "hw/hyperv/vmbus.h"
18 #include "hw/hyperv/vmbus-bridge.h"
19 #include "hw/sysbus.h"
20 #include "cpu.h"
21 #include "trace.h"
22 
23 enum {
24     VMGPADL_INIT,
25     VMGPADL_ALIVE,
26     VMGPADL_TEARINGDOWN,
27     VMGPADL_TORNDOWN,
28 };
29 
30 struct VMBusGpadl {
31     /* GPADL id */
32     uint32_t id;
33     /* associated channel id (rudimentary?) */
34     uint32_t child_relid;
35 
36     /* number of pages in the GPADL as declared in GPADL_HEADER message */
37     uint32_t num_gfns;
38     /*
39      * Due to limited message size, GPADL may not fit fully in a single
40      * GPADL_HEADER message, and is further popluated using GPADL_BODY
41      * messages.  @seen_gfns is the number of pages seen so far; once it
42      * reaches @num_gfns, the GPADL is ready to use.
43      */
44     uint32_t seen_gfns;
45     /* array of GFNs (of size @num_gfns once allocated) */
46     uint64_t *gfns;
47 
48     uint8_t state;
49 
50     QTAILQ_ENTRY(VMBusGpadl) link;
51     VMBus *vmbus;
52     unsigned refcount;
53 };
54 
55 /*
56  * Wrap sequential read from / write to GPADL.
57  */
58 typedef struct GpadlIter {
59     VMBusGpadl *gpadl;
60     AddressSpace *as;
61     DMADirection dir;
62     /* offset into GPADL where the next i/o will be performed */
63     uint32_t off;
64     /*
65      * Cached mapping of the currently accessed page, up to page boundary.
66      * Updated lazily on i/o.
67      * Note: MemoryRegionCache can not be used here because pages in the GPADL
68      * are non-contiguous and may belong to different memory regions.
69      */
70     void *map;
71     /* offset after last i/o (i.e. not affected by seek) */
72     uint32_t last_off;
73     /*
74      * Indicator that the iterator is active and may have a cached mapping.
75      * Allows to enforce bracketing of all i/o (which may create cached
76      * mappings) and thus exclude mapping leaks.
77      */
78     bool active;
79 } GpadlIter;
80 
81 /*
82  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
83  * channel.
84  * Each ring buffer consists of a set of pages, with the first page containing
85  * the ring buffer header, and the remaining pages being for data packets.
86  */
87 typedef struct VMBusRingBufCommon {
88     AddressSpace *as;
89     /* GPA of the ring buffer header */
90     dma_addr_t rb_addr;
91     /* start and length of the ring buffer data area within GPADL */
92     uint32_t base;
93     uint32_t len;
94 
95     GpadlIter iter;
96 } VMBusRingBufCommon;
97 
98 typedef struct VMBusSendRingBuf {
99     VMBusRingBufCommon common;
100     /* current write index, to be committed at the end of send */
101     uint32_t wr_idx;
102     /* write index at the start of send */
103     uint32_t last_wr_idx;
104     /* space to be requested from the guest */
105     uint32_t wanted;
106     /* space reserved for planned sends */
107     uint32_t reserved;
108     /* last seen read index */
109     uint32_t last_seen_rd_idx;
110 } VMBusSendRingBuf;
111 
112 typedef struct VMBusRecvRingBuf {
113     VMBusRingBufCommon common;
114     /* current read index, to be committed at the end of receive */
115     uint32_t rd_idx;
116     /* read index at the start of receive */
117     uint32_t last_rd_idx;
118     /* last seen write index */
119     uint32_t last_seen_wr_idx;
120 } VMBusRecvRingBuf;
121 
122 
123 enum {
124     VMOFFER_INIT,
125     VMOFFER_SENDING,
126     VMOFFER_SENT,
127 };
128 
129 enum {
130     VMCHAN_INIT,
131     VMCHAN_OPENING,
132     VMCHAN_OPEN,
133 };
134 
135 struct VMBusChannel {
136     VMBusDevice *dev;
137 
138     /* channel id */
139     uint32_t id;
140     /*
141      * subchannel index within the device; subchannel #0 is "primary" and
142      * always exists
143      */
144     uint16_t subchan_idx;
145     uint32_t open_id;
146     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
147     uint32_t target_vp;
148     /* GPADL id to use for the ring buffers */
149     uint32_t ringbuf_gpadl;
150     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
151     uint32_t ringbuf_send_offset;
152 
153     uint8_t offer_state;
154     uint8_t state;
155     bool is_open;
156 
157     /* main device worker; copied from the device class */
158     VMBusChannelNotifyCb notify_cb;
159     /*
160      * guest->host notifications, either sent directly or dispatched via
161      * interrupt page (older VMBus)
162      */
163     EventNotifier notifier;
164 
165     VMBus *vmbus;
166     /*
167      * SINT route to signal with host->guest notifications; may be shared with
168      * the main VMBus SINT route
169      */
170     HvSintRoute *notify_route;
171     VMBusGpadl *gpadl;
172 
173     VMBusSendRingBuf send_ringbuf;
174     VMBusRecvRingBuf recv_ringbuf;
175 
176     QTAILQ_ENTRY(VMBusChannel) link;
177 };
178 
179 /*
180  * Hyper-V spec mandates that every message port has 16 buffers, which means
181  * that the guest can post up to this many messages without blocking.
182  * Therefore a queue for incoming messages has to be provided.
183  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
184  * doesn't transition to a new state until the message is known to have been
185  * successfully delivered to the respective SynIC message slot.
186  */
187 #define HV_MSG_QUEUE_LEN     16
188 
189 /* Hyper-V devices never use channel #0.  Must be something special. */
190 #define VMBUS_FIRST_CHANID      1
191 /* Each channel occupies one bit within a single event page sint slot. */
192 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
193 /* Leave a few connection numbers for other purposes. */
194 #define VMBUS_CHAN_CONNECTION_OFFSET     16
195 
196 /*
197  * Since the success or failure of sending a message is reported
198  * asynchronously, the VMBus state machine has effectively two entry points:
199  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
200  * message delivery status becomes known).  Both are run as oneshot BHs on the
201  * main aio context, ensuring serialization.
202  */
203 enum {
204     VMBUS_LISTEN,
205     VMBUS_HANDSHAKE,
206     VMBUS_OFFER,
207     VMBUS_CREATE_GPADL,
208     VMBUS_TEARDOWN_GPADL,
209     VMBUS_OPEN_CHANNEL,
210     VMBUS_UNLOAD,
211     VMBUS_STATE_MAX
212 };
213 
214 struct VMBus {
215     BusState parent;
216 
217     uint8_t state;
218     /* protection against recursive aio_poll (see vmbus_run) */
219     bool in_progress;
220     /* whether there's a message being delivered to the guest */
221     bool msg_in_progress;
222     uint32_t version;
223     /* VP_INDEX of the vCPU to send messages and interrupts to */
224     uint32_t target_vp;
225     HvSintRoute *sint_route;
226     /*
227      * interrupt page for older protocol versions; newer ones use SynIC event
228      * flags directly
229      */
230     hwaddr int_page_gpa;
231 
232     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
233 
234     /* incoming message queue */
235     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
236     uint8_t rx_queue_head;
237     uint8_t rx_queue_size;
238     QemuMutex rx_queue_lock;
239 
240     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
241     QTAILQ_HEAD(, VMBusChannel) channel_list;
242 
243     /*
244      * guest->host notifications for older VMBus, to be dispatched via
245      * interrupt page
246      */
247     EventNotifier notifier;
248 };
249 
250 static bool gpadl_full(VMBusGpadl *gpadl)
251 {
252     return gpadl->seen_gfns == gpadl->num_gfns;
253 }
254 
255 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
256                                 uint32_t child_relid, uint32_t num_gfns)
257 {
258     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
259 
260     gpadl->id = id;
261     gpadl->child_relid = child_relid;
262     gpadl->num_gfns = num_gfns;
263     gpadl->gfns = g_new(uint64_t, num_gfns);
264     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
265     gpadl->vmbus = vmbus;
266     gpadl->refcount = 1;
267     return gpadl;
268 }
269 
270 static void free_gpadl(VMBusGpadl *gpadl)
271 {
272     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
273     g_free(gpadl->gfns);
274     g_free(gpadl);
275 }
276 
277 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
278 {
279     VMBusGpadl *gpadl;
280     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
281         if (gpadl->id == gpadl_id) {
282             return gpadl;
283         }
284     }
285     return NULL;
286 }
287 
288 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
289 {
290     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
291     if (!gpadl || !gpadl_full(gpadl)) {
292         return NULL;
293     }
294     gpadl->refcount++;
295     return gpadl;
296 }
297 
298 void vmbus_put_gpadl(VMBusGpadl *gpadl)
299 {
300     if (!gpadl) {
301         return;
302     }
303     if (--gpadl->refcount) {
304         return;
305     }
306     free_gpadl(gpadl);
307 }
308 
309 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
310 {
311     return gpadl->num_gfns * TARGET_PAGE_SIZE;
312 }
313 
314 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
315                             AddressSpace *as, DMADirection dir)
316 {
317     iter->gpadl = gpadl;
318     iter->as = as;
319     iter->dir = dir;
320     iter->active = false;
321 }
322 
323 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
324 {
325     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
326     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
327 
328     /* mapping is only done to do non-zero amount of i/o */
329     assert(iter->last_off > 0);
330     assert(map_start_in_page < io_end_in_page);
331 
332     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
333                      iter->dir, io_end_in_page - map_start_in_page);
334 }
335 
336 /*
337  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
338  * The direction of the copy is determined by @iter->dir.
339  * The caller must ensure the operation overflows neither @buf nor the GPADL
340  * (there's an assert for the latter).
341  * Reuse the currently mapped page in the GPADL if possible.
342  */
343 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
344 {
345     ssize_t ret = len;
346 
347     assert(iter->active);
348 
349     while (len) {
350         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
351         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
352         uint32_t cplen = MIN(pgleft, len);
353         void *p;
354 
355         /* try to reuse the cached mapping */
356         if (iter->map) {
357             uint32_t map_start_in_page =
358                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
359             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
360             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
361             if (off_base != mapped_base || off_in_page < map_start_in_page) {
362                 gpadl_iter_cache_unmap(iter);
363                 iter->map = NULL;
364             }
365         }
366 
367         if (!iter->map) {
368             dma_addr_t maddr;
369             dma_addr_t mlen = pgleft;
370             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
371             assert(idx < iter->gpadl->num_gfns);
372 
373             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
374 
375             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
376             if (mlen != pgleft) {
377                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
378                 iter->map = NULL;
379                 return -EFAULT;
380             }
381         }
382 
383         p = (void *)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | off_in_page);
384         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
385             memcpy(p, buf, cplen);
386         } else {
387             memcpy(buf, p, cplen);
388         }
389 
390         buf += cplen;
391         len -= cplen;
392         iter->off += cplen;
393         iter->last_off = iter->off;
394     }
395 
396     return ret;
397 }
398 
399 /*
400  * Position the iterator @iter at new offset @new_off.
401  * If this results in the cached mapping being unusable with the new offset,
402  * unmap it.
403  */
404 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
405 {
406     assert(iter->active);
407     iter->off = new_off;
408 }
409 
410 /*
411  * Start a series of i/o on the GPADL.
412  * After this i/o and seek operations on @iter become legal.
413  */
414 static inline void gpadl_iter_start_io(GpadlIter *iter)
415 {
416     assert(!iter->active);
417     /* mapping is cached lazily on i/o */
418     iter->map = NULL;
419     iter->active = true;
420 }
421 
422 /*
423  * End the eariler started series of i/o on the GPADL and release the cached
424  * mapping if any.
425  */
426 static inline void gpadl_iter_end_io(GpadlIter *iter)
427 {
428     assert(iter->active);
429 
430     if (iter->map) {
431         gpadl_iter_cache_unmap(iter);
432     }
433 
434     iter->active = false;
435 }
436 
437 static void vmbus_resched(VMBus *vmbus);
438 static void vmbus_msg_cb(void *data, int status);
439 
440 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
441                            const struct iovec *iov, size_t iov_cnt)
442 {
443     GpadlIter iter;
444     size_t i;
445     ssize_t ret = 0;
446 
447     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
448                     DMA_DIRECTION_FROM_DEVICE);
449     gpadl_iter_start_io(&iter);
450     gpadl_iter_seek(&iter, off);
451     for (i = 0; i < iov_cnt; i++) {
452         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
453         if (ret < 0) {
454             goto out;
455         }
456     }
457 out:
458     gpadl_iter_end_io(&iter);
459     return ret;
460 }
461 
462 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
463                   unsigned iov_cnt, size_t len, size_t off)
464 {
465     int ret_cnt = 0, ret;
466     unsigned i;
467     QEMUSGList *sgl = &req->sgl;
468     ScatterGatherEntry *sg = sgl->sg;
469 
470     for (i = 0; i < sgl->nsg; i++) {
471         if (sg[i].len > off) {
472             break;
473         }
474         off -= sg[i].len;
475     }
476     for (; len && i < sgl->nsg; i++) {
477         dma_addr_t mlen = MIN(sg[i].len - off, len);
478         dma_addr_t addr = sg[i].base + off;
479         len -= mlen;
480         off = 0;
481 
482         for (; mlen; ret_cnt++) {
483             dma_addr_t l = mlen;
484             dma_addr_t a = addr;
485 
486             if (ret_cnt == iov_cnt) {
487                 ret = -ENOBUFS;
488                 goto err;
489             }
490 
491             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
492             if (!l) {
493                 ret = -EFAULT;
494                 goto err;
495             }
496             iov[ret_cnt].iov_len = l;
497             addr += l;
498             mlen -= l;
499         }
500     }
501 
502     return ret_cnt;
503 err:
504     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
505     return ret;
506 }
507 
508 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
509                      unsigned iov_cnt, size_t accessed)
510 {
511     QEMUSGList *sgl = &req->sgl;
512     unsigned i;
513 
514     for (i = 0; i < iov_cnt; i++) {
515         size_t acsd = MIN(accessed, iov[i].iov_len);
516         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
517         accessed -= acsd;
518     }
519 }
520 
521 static const VMStateDescription vmstate_gpadl = {
522     .name = "vmbus/gpadl",
523     .version_id = 0,
524     .minimum_version_id = 0,
525     .fields = (VMStateField[]) {
526         VMSTATE_UINT32(id, VMBusGpadl),
527         VMSTATE_UINT32(child_relid, VMBusGpadl),
528         VMSTATE_UINT32(num_gfns, VMBusGpadl),
529         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
530         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
531                                     vmstate_info_uint64, uint64_t),
532         VMSTATE_UINT8(state, VMBusGpadl),
533         VMSTATE_END_OF_LIST()
534     }
535 };
536 
537 /*
538  * Wrap the index into a ring buffer of @len bytes.
539  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
540  * single wraparound is considered.
541  */
542 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
543 {
544     if (idx >= len) {
545         idx -= len;
546     }
547     return idx;
548 }
549 
550 /*
551  * Circular difference between two indices into a ring buffer of @len bytes.
552  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
553  * up write index but not vice versa.
554  */
555 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
556                                     bool allow_catchup)
557 {
558     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
559 }
560 
561 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
562 {
563     vmbus_ring_buffer *rb;
564     dma_addr_t mlen = sizeof(*rb);
565 
566     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
567                         DMA_DIRECTION_FROM_DEVICE);
568     if (mlen != sizeof(*rb)) {
569         dma_memory_unmap(ringbuf->as, rb, mlen,
570                          DMA_DIRECTION_FROM_DEVICE, 0);
571         return NULL;
572     }
573     return rb;
574 }
575 
576 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
577                               vmbus_ring_buffer *rb, bool dirty)
578 {
579     assert(rb);
580 
581     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
582                      dirty ? sizeof(*rb) : 0);
583 }
584 
585 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
586                                 AddressSpace *as, DMADirection dir,
587                                 uint32_t begin, uint32_t end)
588 {
589     ringbuf->as = as;
590     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
591     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
592     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
593     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
594 }
595 
596 static int ringbufs_init(VMBusChannel *chan)
597 {
598     vmbus_ring_buffer *rb;
599     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
600     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
601 
602     if (chan->ringbuf_send_offset <= 1 ||
603         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
604         return -EINVAL;
605     }
606 
607     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
608                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
609     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
610                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
611                         chan->gpadl->num_gfns);
612     send_ringbuf->wanted = 0;
613     send_ringbuf->reserved = 0;
614 
615     rb = ringbuf_map_hdr(&recv_ringbuf->common);
616     if (!rb) {
617         return -EFAULT;
618     }
619     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
620     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
621 
622     rb = ringbuf_map_hdr(&send_ringbuf->common);
623     if (!rb) {
624         return -EFAULT;
625     }
626     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
627     send_ringbuf->last_seen_rd_idx = rb->read_index;
628     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
629     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
630 
631     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
632         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
633         return -EOVERFLOW;
634     }
635 
636     return 0;
637 }
638 
639 /*
640  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
641  * around if needed.
642  * @len is assumed not to exceed the size of the ringbuffer, so only single
643  * wraparound is considered.
644  */
645 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
646 {
647     ssize_t ret1 = 0, ret2 = 0;
648     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
649 
650     if (len >= remain) {
651         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
652         if (ret1 < 0) {
653             return ret1;
654         }
655         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
656         buf += remain;
657         len -= remain;
658     }
659     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
660     if (ret2 < 0) {
661         return ret2;
662     }
663     return ret1 + ret2;
664 }
665 
666 /*
667  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
668  * around if needed.
669  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
670  * single wraparound is considered.
671  */
672 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
673 {
674     gpadl_iter_seek(&ringbuf->iter,
675                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
676 }
677 
678 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
679 {
680     return ringbuf->iter.off - ringbuf->base;
681 }
682 
683 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
684 {
685     gpadl_iter_start_io(&ringbuf->iter);
686 }
687 
688 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
689 {
690     gpadl_iter_end_io(&ringbuf->iter);
691 }
692 
693 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
694 {
695     return chan->dev;
696 }
697 
698 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
699 {
700     if (chan_idx >= dev->num_channels) {
701         return NULL;
702     }
703     return &dev->channels[chan_idx];
704 }
705 
706 uint32_t vmbus_channel_idx(VMBusChannel *chan)
707 {
708     return chan - chan->dev->channels;
709 }
710 
711 void vmbus_channel_notify_host(VMBusChannel *chan)
712 {
713     event_notifier_set(&chan->notifier);
714 }
715 
716 bool vmbus_channel_is_open(VMBusChannel *chan)
717 {
718     return chan->is_open;
719 }
720 
721 /*
722  * Notify the guest side about the data to work on in the channel ring buffer.
723  * The notification is done by signaling a dedicated per-channel SynIC event
724  * flag (more recent guests) or setting a bit in the interrupt page and firing
725  * the VMBus SINT (older guests).
726  */
727 static int vmbus_channel_notify_guest(VMBusChannel *chan)
728 {
729     int res = 0;
730     unsigned long *int_map, mask;
731     unsigned idx;
732     hwaddr addr = chan->vmbus->int_page_gpa;
733     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
734 
735     trace_vmbus_channel_notify_guest(chan->id);
736 
737     if (!addr) {
738         return hyperv_set_event_flag(chan->notify_route, chan->id);
739     }
740 
741     int_map = cpu_physical_memory_map(addr, &len, 1);
742     if (len != TARGET_PAGE_SIZE / 2) {
743         res = -ENXIO;
744         goto unmap;
745     }
746 
747     idx = BIT_WORD(chan->id);
748     mask = BIT_MASK(chan->id);
749     if ((atomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
750         res = hyperv_sint_route_set_sint(chan->notify_route);
751         dirty = len;
752     }
753 
754 unmap:
755     cpu_physical_memory_unmap(int_map, len, 1, dirty);
756     return res;
757 }
758 
759 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
760 
761 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
762                                           uint32_t desclen, uint32_t msglen)
763 {
764     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
765         DIV_ROUND_UP(desclen, sizeof(uint64_t));
766     hdr->len_qwords = hdr->offset_qwords +
767         DIV_ROUND_UP(msglen, sizeof(uint64_t));
768     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
769 }
770 
771 /*
772  * Simplified ring buffer operation with paired barriers annotations in the
773  * producer and consumer loops:
774  *
775  * producer                           * consumer
776  * ~~~~~~~~                           * ~~~~~~~~
777  * write pending_send_sz              * read write_index
778  * smp_mb                       [A]   * smp_mb                       [C]
779  * read read_index                    * read packet
780  * smp_mb                       [B]   * read/write out-of-band data
781  * read/write out-of-band data        * smp_mb                       [B]
782  * write packet                       * write read_index
783  * smp_mb                       [C]   * smp_mb                       [A]
784  * write write_index                  * read pending_send_sz
785  * smp_wmb                      [D]   * smp_rmb                      [D]
786  * write pending_send_sz              * read write_index
787  * ...                                * ...
788  */
789 
790 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
791 {
792     /* don't trust guest data */
793     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
794         return 0;
795     }
796     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
797                         ringbuf->common.len, false);
798 }
799 
800 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
801 {
802     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
803     vmbus_ring_buffer *rb;
804     uint32_t written;
805 
806     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
807                            ringbuf->common.len, true);
808     if (!written) {
809         return 0;
810     }
811 
812     rb = ringbuf_map_hdr(&ringbuf->common);
813     if (!rb) {
814         return -EFAULT;
815     }
816 
817     ringbuf->reserved -= written;
818 
819     /* prevent reorder with the data operation and packet write */
820     smp_mb();                   /* barrier pair [C] */
821     rb->write_index = ringbuf->wr_idx;
822 
823     /*
824      * If the producer earlier indicated that it wants to be notified when the
825      * consumer frees certain amount of space in the ring buffer, that amount
826      * is reduced by the size of the completed write.
827      */
828     if (ringbuf->wanted) {
829         /* otherwise reservation would fail */
830         assert(ringbuf->wanted < written);
831         ringbuf->wanted -= written;
832         /* prevent reorder with write_index write */
833         smp_wmb();              /* barrier pair [D] */
834         rb->pending_send_sz = ringbuf->wanted;
835     }
836 
837     /* prevent reorder with write_index or pending_send_sz write */
838     smp_mb();                   /* barrier pair [A] */
839     ringbuf->last_seen_rd_idx = rb->read_index;
840 
841     /*
842      * The consumer may have missed the reduction of pending_send_sz and skip
843      * notification, so re-check the blocking condition, and, if it's no longer
844      * true, ensure processing another iteration by simulating consumer's
845      * notification.
846      */
847     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
848         vmbus_channel_notify_host(chan);
849     }
850 
851     /* skip notification by consumer's request */
852     if (rb->interrupt_mask) {
853         goto out;
854     }
855 
856     /*
857      * The consumer hasn't caught up with the producer's previous state so it's
858      * not blocked.
859      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
860      * validation here as it only affects notification.)
861      */
862     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
863                      ringbuf->common.len, true) > written) {
864         goto out;
865     }
866 
867     vmbus_channel_notify_guest(chan);
868 out:
869     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
870     ringbuf->last_wr_idx = ringbuf->wr_idx;
871     return written;
872 }
873 
874 int vmbus_channel_reserve(VMBusChannel *chan,
875                           uint32_t desclen, uint32_t msglen)
876 {
877     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
878     vmbus_ring_buffer *rb = NULL;
879     vmbus_packet_hdr hdr;
880     uint32_t needed = ringbuf->reserved +
881         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
882 
883     /* avoid touching the guest memory if possible */
884     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
885         goto success;
886     }
887 
888     rb = ringbuf_map_hdr(&ringbuf->common);
889     if (!rb) {
890         return -EFAULT;
891     }
892 
893     /* fetch read index from guest memory and try again */
894     ringbuf->last_seen_rd_idx = rb->read_index;
895 
896     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
897         goto success;
898     }
899 
900     rb->pending_send_sz = needed;
901 
902     /*
903      * The consumer may have made progress and freed up some space before
904      * seeing updated pending_send_sz, so re-read read_index (preventing
905      * reorder with the pending_send_sz write) and try again.
906      */
907     smp_mb();                   /* barrier pair [A] */
908     ringbuf->last_seen_rd_idx = rb->read_index;
909 
910     if (needed > ringbuf_send_avail(ringbuf)) {
911         goto out;
912     }
913 
914 success:
915     ringbuf->reserved = needed;
916     needed = 0;
917 
918     /* clear pending_send_sz if it was set */
919     if (ringbuf->wanted) {
920         if (!rb) {
921             rb = ringbuf_map_hdr(&ringbuf->common);
922             if (!rb) {
923                 /* failure to clear pending_send_sz is non-fatal */
924                 goto out;
925             }
926         }
927 
928         rb->pending_send_sz = 0;
929     }
930 
931     /* prevent reorder of the following data operation with read_index read */
932     smp_mb();                   /* barrier pair [B] */
933 
934 out:
935     if (rb) {
936         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
937     }
938     ringbuf->wanted = needed;
939     return needed ? -ENOSPC : 0;
940 }
941 
942 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
943                            void *desc, uint32_t desclen,
944                            void *msg, uint32_t msglen,
945                            bool need_comp, uint64_t transaction_id)
946 {
947     ssize_t ret = 0;
948     vmbus_packet_hdr hdr;
949     uint32_t totlen;
950     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
951 
952     if (!vmbus_channel_is_open(chan)) {
953         return -EINVAL;
954     }
955 
956     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
957     hdr.type = pkt_type;
958     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
959     hdr.transaction_id = transaction_id;
960 
961     assert(totlen <= ringbuf->reserved);
962 
963     ringbuf_start_io(&ringbuf->common);
964     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
965     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
966     if (ret < 0) {
967         goto out;
968     }
969     if (desclen) {
970         assert(desc);
971         ret = ringbuf_io(&ringbuf->common, desc, desclen);
972         if (ret < 0) {
973             goto out;
974         }
975         ringbuf_seek(&ringbuf->common,
976                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
977     }
978     ret = ringbuf_io(&ringbuf->common, msg, msglen);
979     if (ret < 0) {
980         goto out;
981     }
982     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
983     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
984     ret = 0;
985 out:
986     ringbuf_end_io(&ringbuf->common);
987     if (ret) {
988         return ret;
989     }
990     return ringbuf_send_update_idx(chan);
991 }
992 
993 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
994                                       void *msg, uint32_t msglen)
995 {
996     assert(req->need_comp);
997     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
998                               msg, msglen, false, req->transaction_id);
999 }
1000 
1001 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1002                                VMBusRingBufCommon *ringbuf, uint32_t len)
1003 {
1004     int ret;
1005     vmbus_pkt_gpa_direct hdr;
1006     hwaddr curaddr = 0;
1007     hwaddr curlen = 0;
1008     int num;
1009 
1010     if (len < sizeof(hdr)) {
1011         return -EIO;
1012     }
1013     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1014     if (ret < 0) {
1015         return ret;
1016     }
1017     len -= sizeof(hdr);
1018 
1019     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1020     if (num < 0) {
1021         return -EIO;
1022     }
1023     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1024 
1025     for (; hdr.rangecount; hdr.rangecount--) {
1026         vmbus_gpa_range range;
1027 
1028         if (len < sizeof(range)) {
1029             goto eio;
1030         }
1031         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1032         if (ret < 0) {
1033             goto err;
1034         }
1035         len -= sizeof(range);
1036 
1037         if (range.byte_offset & TARGET_PAGE_MASK) {
1038             goto eio;
1039         }
1040 
1041         for (; range.byte_count; range.byte_offset = 0) {
1042             uint64_t paddr;
1043             uint32_t plen = MIN(range.byte_count,
1044                                 TARGET_PAGE_SIZE - range.byte_offset);
1045 
1046             if (len < sizeof(uint64_t)) {
1047                 goto eio;
1048             }
1049             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1050             if (ret < 0) {
1051                 goto err;
1052             }
1053             len -= sizeof(uint64_t);
1054             paddr <<= TARGET_PAGE_BITS;
1055             paddr |= range.byte_offset;
1056             range.byte_count -= plen;
1057 
1058             if (curaddr + curlen == paddr) {
1059                 /* consecutive fragments - join */
1060                 curlen += plen;
1061             } else {
1062                 if (curlen) {
1063                     qemu_sglist_add(sgl, curaddr, curlen);
1064                 }
1065 
1066                 curaddr = paddr;
1067                 curlen = plen;
1068             }
1069         }
1070     }
1071 
1072     if (curlen) {
1073         qemu_sglist_add(sgl, curaddr, curlen);
1074     }
1075 
1076     return 0;
1077 eio:
1078     ret = -EIO;
1079 err:
1080     qemu_sglist_destroy(sgl);
1081     return ret;
1082 }
1083 
1084 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1085                                      uint32_t size, uint16_t pkt_type,
1086                                      uint32_t msglen, uint64_t transaction_id,
1087                                      bool need_comp)
1088 {
1089     VMBusChanReq *req;
1090     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1091     uint32_t totlen = msgoff + msglen;
1092 
1093     req = g_malloc0(totlen);
1094     req->chan = chan;
1095     req->pkt_type = pkt_type;
1096     req->msg = (void *)req + msgoff;
1097     req->msglen = msglen;
1098     req->transaction_id = transaction_id;
1099     req->need_comp = need_comp;
1100     return req;
1101 }
1102 
1103 int vmbus_channel_recv_start(VMBusChannel *chan)
1104 {
1105     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1106     vmbus_ring_buffer *rb;
1107 
1108     rb = ringbuf_map_hdr(&ringbuf->common);
1109     if (!rb) {
1110         return -EFAULT;
1111     }
1112     ringbuf->last_seen_wr_idx = rb->write_index;
1113     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1114 
1115     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1116         return -EOVERFLOW;
1117     }
1118 
1119     /* prevent reorder of the following data operation with write_index read */
1120     smp_mb();                   /* barrier pair [C] */
1121     return 0;
1122 }
1123 
1124 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1125 {
1126     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1127     vmbus_packet_hdr hdr = {};
1128     VMBusChanReq *req;
1129     uint32_t avail;
1130     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1131 
1132     assert(size >= sizeof(*req));
1133 
1134     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1135     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1136                          ringbuf->common.len, true);
1137     if (avail < sizeof(hdr)) {
1138         return NULL;
1139     }
1140 
1141     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1142     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1143         return NULL;
1144     }
1145 
1146     pktlen = hdr.len_qwords * sizeof(uint64_t);
1147     totlen = pktlen + VMBUS_PKT_TRAILER;
1148     if (totlen > avail) {
1149         return NULL;
1150     }
1151 
1152     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1153     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1154         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1155         return NULL;
1156     }
1157 
1158     msglen = pktlen - msgoff;
1159 
1160     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1161                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1162 
1163     switch (hdr.type) {
1164     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1165         desclen = msgoff - sizeof(hdr);
1166         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1167                                 desclen) < 0) {
1168             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1169             goto free_req;
1170         }
1171         break;
1172     case VMBUS_PACKET_DATA_INBAND:
1173     case VMBUS_PACKET_COMP:
1174         break;
1175     default:
1176         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1177         goto free_req;
1178     }
1179 
1180     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1181     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1182         goto free_req;
1183     }
1184     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1185 
1186     return req;
1187 free_req:
1188     vmbus_free_req(req);
1189     return NULL;
1190 }
1191 
1192 void vmbus_channel_recv_pop(VMBusChannel *chan)
1193 {
1194     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1195     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1196 }
1197 
1198 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1199 {
1200     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1201     vmbus_ring_buffer *rb;
1202     uint32_t read;
1203 
1204     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1205                         ringbuf->common.len, true);
1206     if (!read) {
1207         return 0;
1208     }
1209 
1210     rb = ringbuf_map_hdr(&ringbuf->common);
1211     if (!rb) {
1212         return -EFAULT;
1213     }
1214 
1215     /* prevent reorder with the data operation and packet read */
1216     smp_mb();                   /* barrier pair [B] */
1217     rb->read_index = ringbuf->rd_idx;
1218 
1219     /* prevent reorder of the following pending_send_sz read */
1220     smp_mb();                   /* barrier pair [A] */
1221 
1222     if (rb->interrupt_mask) {
1223         goto out;
1224     }
1225 
1226     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1227         uint32_t wr_idx, wr_avail;
1228         uint32_t wanted = rb->pending_send_sz;
1229 
1230         if (!wanted) {
1231             goto out;
1232         }
1233 
1234         /* prevent reorder with pending_send_sz read */
1235         smp_rmb();              /* barrier pair [D] */
1236         wr_idx = rb->write_index;
1237 
1238         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1239                                 true);
1240 
1241         /* the producer wasn't blocked on the consumer state */
1242         if (wr_avail >= read + wanted) {
1243             goto out;
1244         }
1245         /* there's not enough space for the producer to make progress */
1246         if (wr_avail < wanted) {
1247             goto out;
1248         }
1249     }
1250 
1251     vmbus_channel_notify_guest(chan);
1252 out:
1253     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1254     ringbuf->last_rd_idx = ringbuf->rd_idx;
1255     return read;
1256 }
1257 
1258 void vmbus_free_req(void *req)
1259 {
1260     VMBusChanReq *r = req;
1261 
1262     if (!req) {
1263         return;
1264     }
1265 
1266     if (r->sgl.dev) {
1267         qemu_sglist_destroy(&r->sgl);
1268     }
1269     g_free(req);
1270 }
1271 
1272 static const VMStateDescription vmstate_sgent = {
1273     .name = "vmbus/sgentry",
1274     .version_id = 0,
1275     .minimum_version_id = 0,
1276     .fields = (VMStateField[]) {
1277         VMSTATE_UINT64(base, ScatterGatherEntry),
1278         VMSTATE_UINT64(len, ScatterGatherEntry),
1279         VMSTATE_END_OF_LIST()
1280     }
1281 };
1282 
1283 typedef struct VMBusChanReqSave {
1284     uint16_t chan_idx;
1285     uint16_t pkt_type;
1286     uint32_t msglen;
1287     void *msg;
1288     uint64_t transaction_id;
1289     bool need_comp;
1290     uint32_t num;
1291     ScatterGatherEntry *sgl;
1292 } VMBusChanReqSave;
1293 
1294 static const VMStateDescription vmstate_vmbus_chan_req = {
1295     .name = "vmbus/vmbus_chan_req",
1296     .version_id = 0,
1297     .minimum_version_id = 0,
1298     .fields = (VMStateField[]) {
1299         VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1300         VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1301         VMSTATE_UINT32(msglen, VMBusChanReqSave),
1302         VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1303         VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1304         VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1305         VMSTATE_UINT32(num, VMBusChanReqSave),
1306         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1307                                              vmstate_sgent, ScatterGatherEntry),
1308         VMSTATE_END_OF_LIST()
1309     }
1310 };
1311 
1312 void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1313 {
1314     VMBusChanReqSave req_save;
1315 
1316     req_save.chan_idx = req->chan->subchan_idx;
1317     req_save.pkt_type = req->pkt_type;
1318     req_save.msglen = req->msglen;
1319     req_save.msg = req->msg;
1320     req_save.transaction_id = req->transaction_id;
1321     req_save.need_comp = req->need_comp;
1322     req_save.num = req->sgl.nsg;
1323     req_save.sgl = g_memdup(req->sgl.sg,
1324                             req_save.num * sizeof(ScatterGatherEntry));
1325 
1326     vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1327 
1328     g_free(req_save.sgl);
1329 }
1330 
1331 void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1332 {
1333     VMBusChanReqSave req_save;
1334     VMBusChanReq *req = NULL;
1335     VMBusChannel *chan = NULL;
1336     uint32_t i;
1337 
1338     vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1339 
1340     if (req_save.chan_idx >= dev->num_channels) {
1341         error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1342                      req_save.chan_idx, dev->num_channels);
1343         goto out;
1344     }
1345     chan = &dev->channels[req_save.chan_idx];
1346 
1347     if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1348         goto out;
1349     }
1350 
1351     req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1352                           req_save.transaction_id, req_save.need_comp);
1353     if (req_save.msglen) {
1354         memcpy(req->msg, req_save.msg, req_save.msglen);
1355     }
1356 
1357     for (i = 0; i < req_save.num; i++) {
1358         qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1359     }
1360 
1361 out:
1362     if (req_save.msglen) {
1363         g_free(req_save.msg);
1364     }
1365     if (req_save.num) {
1366         g_free(req_save.sgl);
1367     }
1368     return req;
1369 }
1370 
1371 static void channel_event_cb(EventNotifier *e)
1372 {
1373     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1374     if (event_notifier_test_and_clear(e)) {
1375         /*
1376          * All receives are supposed to happen within the device worker, so
1377          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1378          * potentially reuse the cached mapping throughout the worker.
1379          * Can't do this for sends as they may happen outside the device
1380          * worker.
1381          */
1382         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1383         ringbuf_start_io(&ringbuf->common);
1384         chan->notify_cb(chan);
1385         ringbuf_end_io(&ringbuf->common);
1386 
1387     }
1388 }
1389 
1390 static int alloc_chan_id(VMBus *vmbus)
1391 {
1392     int ret;
1393 
1394     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1395     if (ret == VMBUS_CHANID_COUNT) {
1396         return -ENOMEM;
1397     }
1398     return ret + VMBUS_FIRST_CHANID;
1399 }
1400 
1401 static int register_chan_id(VMBusChannel *chan)
1402 {
1403     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1404                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1405 }
1406 
1407 static void unregister_chan_id(VMBusChannel *chan)
1408 {
1409     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1410 }
1411 
1412 static uint32_t chan_connection_id(VMBusChannel *chan)
1413 {
1414     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1415 }
1416 
1417 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1418                          VMBusChannel *chan, uint16_t idx, Error **errp)
1419 {
1420     int res;
1421 
1422     chan->dev = dev;
1423     chan->notify_cb = vdc->chan_notify_cb;
1424     chan->subchan_idx = idx;
1425     chan->vmbus = vmbus;
1426 
1427     res = alloc_chan_id(vmbus);
1428     if (res < 0) {
1429         error_setg(errp, "no spare channel id");
1430         return;
1431     }
1432     chan->id = res;
1433     register_chan_id(chan);
1434 
1435     /*
1436      * The guest drivers depend on the device subchannels (idx #1+) to be
1437      * offered after the primary channel (idx #0) of that device.  To ensure
1438      * that, record the channels on the channel list in the order they appear
1439      * within the device.
1440      */
1441     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1442 }
1443 
1444 static void deinit_channel(VMBusChannel *chan)
1445 {
1446     assert(chan->state == VMCHAN_INIT);
1447     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1448     unregister_chan_id(chan);
1449 }
1450 
1451 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1452 {
1453     uint16_t i;
1454     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1455     Error *err = NULL;
1456 
1457     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1458     if (dev->num_channels < 1) {
1459         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1460         return;
1461     }
1462 
1463     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1464     for (i = 0; i < dev->num_channels; i++) {
1465         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1466         if (err) {
1467             goto err_init;
1468         }
1469     }
1470 
1471     return;
1472 
1473 err_init:
1474     while (i--) {
1475         deinit_channel(&dev->channels[i]);
1476     }
1477     error_propagate(errp, err);
1478 }
1479 
1480 static void free_channels(VMBusDevice *dev)
1481 {
1482     uint16_t i;
1483     for (i = 0; i < dev->num_channels; i++) {
1484         deinit_channel(&dev->channels[i]);
1485     }
1486     g_free(dev->channels);
1487 }
1488 
1489 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1490 {
1491     VMBusChannel *chan;
1492 
1493     if (vp_index == vmbus->target_vp) {
1494         hyperv_sint_route_ref(vmbus->sint_route);
1495         return vmbus->sint_route;
1496     }
1497 
1498     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1499         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1500             hyperv_sint_route_ref(chan->notify_route);
1501             return chan->notify_route;
1502         }
1503     }
1504 
1505     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1506 }
1507 
1508 static void open_channel(VMBusChannel *chan)
1509 {
1510     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1511 
1512     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1513     if (!chan->gpadl) {
1514         return;
1515     }
1516 
1517     if (ringbufs_init(chan)) {
1518         goto put_gpadl;
1519     }
1520 
1521     if (event_notifier_init(&chan->notifier, 0)) {
1522         goto put_gpadl;
1523     }
1524 
1525     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1526 
1527     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1528                                       &chan->notifier)) {
1529         goto cleanup_notifier;
1530     }
1531 
1532     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1533     if (!chan->notify_route) {
1534         goto clear_event_flag_handler;
1535     }
1536 
1537     if (vdc->open_channel && vdc->open_channel(chan)) {
1538         goto unref_sint_route;
1539     }
1540 
1541     chan->is_open = true;
1542     return;
1543 
1544 unref_sint_route:
1545     hyperv_sint_route_unref(chan->notify_route);
1546 clear_event_flag_handler:
1547     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1548 cleanup_notifier:
1549     event_notifier_set_handler(&chan->notifier, NULL);
1550     event_notifier_cleanup(&chan->notifier);
1551 put_gpadl:
1552     vmbus_put_gpadl(chan->gpadl);
1553 }
1554 
1555 static void close_channel(VMBusChannel *chan)
1556 {
1557     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1558 
1559     if (!chan->is_open) {
1560         return;
1561     }
1562 
1563     if (vdc->close_channel) {
1564         vdc->close_channel(chan);
1565     }
1566 
1567     hyperv_sint_route_unref(chan->notify_route);
1568     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1569     event_notifier_set_handler(&chan->notifier, NULL);
1570     event_notifier_cleanup(&chan->notifier);
1571     vmbus_put_gpadl(chan->gpadl);
1572     chan->is_open = false;
1573 }
1574 
1575 static int channel_post_load(void *opaque, int version_id)
1576 {
1577     VMBusChannel *chan = opaque;
1578 
1579     return register_chan_id(chan);
1580 }
1581 
1582 static const VMStateDescription vmstate_channel = {
1583     .name = "vmbus/channel",
1584     .version_id = 0,
1585     .minimum_version_id = 0,
1586     .post_load = channel_post_load,
1587     .fields = (VMStateField[]) {
1588         VMSTATE_UINT32(id, VMBusChannel),
1589         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1590         VMSTATE_UINT32(open_id, VMBusChannel),
1591         VMSTATE_UINT32(target_vp, VMBusChannel),
1592         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1593         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1594         VMSTATE_UINT8(offer_state, VMBusChannel),
1595         VMSTATE_UINT8(state, VMBusChannel),
1596         VMSTATE_END_OF_LIST()
1597     }
1598 };
1599 
1600 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1601 {
1602     VMBusChannel *chan;
1603     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1604         if (chan->id == id) {
1605             return chan;
1606         }
1607     }
1608     return NULL;
1609 }
1610 
1611 static int enqueue_incoming_message(VMBus *vmbus,
1612                                     const struct hyperv_post_message_input *msg)
1613 {
1614     int ret = 0;
1615     uint8_t idx, prev_size;
1616 
1617     qemu_mutex_lock(&vmbus->rx_queue_lock);
1618 
1619     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1620         ret = -ENOBUFS;
1621         goto out;
1622     }
1623 
1624     prev_size = vmbus->rx_queue_size;
1625     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1626     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1627     vmbus->rx_queue_size++;
1628 
1629     /* only need to resched if the queue was empty before */
1630     if (!prev_size) {
1631         vmbus_resched(vmbus);
1632     }
1633 out:
1634     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1635     return ret;
1636 }
1637 
1638 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1639                                    void *data)
1640 {
1641     VMBus *vmbus = data;
1642     struct vmbus_message_header *vmbus_msg;
1643 
1644     if (msg->message_type != HV_MESSAGE_VMBUS) {
1645         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1646     }
1647 
1648     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1649         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1650     }
1651 
1652     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1653 
1654     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1655 
1656     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1657         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1658         error_report("vmbus: unknown message type %#x",
1659                      vmbus_msg->message_type);
1660         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1661     }
1662 
1663     if (enqueue_incoming_message(vmbus, msg)) {
1664         return HV_STATUS_INSUFFICIENT_BUFFERS;
1665     }
1666     return HV_STATUS_SUCCESS;
1667 }
1668 
1669 static bool vmbus_initialized(VMBus *vmbus)
1670 {
1671     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1672 }
1673 
1674 static void vmbus_reset_all(VMBus *vmbus)
1675 {
1676     qbus_reset_all(BUS(vmbus));
1677 }
1678 
1679 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1680 {
1681     int ret;
1682     struct hyperv_message msg = {
1683         .header.message_type = HV_MESSAGE_VMBUS,
1684     };
1685 
1686     assert(!vmbus->msg_in_progress);
1687     assert(msglen <= sizeof(msg.payload));
1688     assert(msglen >= sizeof(struct vmbus_message_header));
1689 
1690     vmbus->msg_in_progress = true;
1691 
1692     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1693                          msglen);
1694 
1695     memcpy(msg.payload, msgdata, msglen);
1696     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1697 
1698     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1699     if (ret == 0 || ret == -EAGAIN) {
1700         return;
1701     }
1702 
1703     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1704     vmbus_reset_all(vmbus);
1705 }
1706 
1707 static int vmbus_init(VMBus *vmbus)
1708 {
1709     if (vmbus->target_vp != (uint32_t)-1) {
1710         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1711                                                   vmbus_msg_cb, vmbus);
1712         if (!vmbus->sint_route) {
1713             error_report("failed to set up SINT route");
1714             return -ENOMEM;
1715         }
1716     }
1717     return 0;
1718 }
1719 
1720 static void vmbus_deinit(VMBus *vmbus)
1721 {
1722     VMBusGpadl *gpadl, *tmp_gpadl;
1723     VMBusChannel *chan;
1724 
1725     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1726         if (gpadl->state == VMGPADL_TORNDOWN) {
1727             continue;
1728         }
1729         vmbus_put_gpadl(gpadl);
1730     }
1731 
1732     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1733         chan->offer_state = VMOFFER_INIT;
1734     }
1735 
1736     hyperv_sint_route_unref(vmbus->sint_route);
1737     vmbus->sint_route = NULL;
1738     vmbus->int_page_gpa = 0;
1739     vmbus->target_vp = (uint32_t)-1;
1740     vmbus->version = 0;
1741     vmbus->state = VMBUS_LISTEN;
1742     vmbus->msg_in_progress = false;
1743 }
1744 
1745 static void handle_initiate_contact(VMBus *vmbus,
1746                                     vmbus_message_initiate_contact *msg,
1747                                     uint32_t msglen)
1748 {
1749     if (msglen < sizeof(*msg)) {
1750         return;
1751     }
1752 
1753     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1754                                  msg->version_requested & 0xffff,
1755                                  msg->target_vcpu, msg->monitor_page1,
1756                                  msg->monitor_page2, msg->interrupt_page);
1757 
1758     /*
1759      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1760      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1761      * before handing over to OS loader.
1762      */
1763     vmbus_reset_all(vmbus);
1764 
1765     vmbus->target_vp = msg->target_vcpu;
1766     vmbus->version = msg->version_requested;
1767     if (vmbus->version < VMBUS_VERSION_WIN8) {
1768         /* linux passes interrupt page even when it doesn't need it */
1769         vmbus->int_page_gpa = msg->interrupt_page;
1770     }
1771     vmbus->state = VMBUS_HANDSHAKE;
1772 
1773     if (vmbus_init(vmbus)) {
1774         error_report("failed to init vmbus; aborting");
1775         vmbus_deinit(vmbus);
1776         return;
1777     }
1778 }
1779 
1780 static void send_handshake(VMBus *vmbus)
1781 {
1782     struct vmbus_message_version_response msg = {
1783         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1784         .version_supported = vmbus_initialized(vmbus),
1785     };
1786 
1787     post_msg(vmbus, &msg, sizeof(msg));
1788 }
1789 
1790 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1791 {
1792     VMBusChannel *chan;
1793 
1794     if (!vmbus_initialized(vmbus)) {
1795         return;
1796     }
1797 
1798     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1799         if (chan->offer_state == VMOFFER_INIT) {
1800             chan->offer_state = VMOFFER_SENDING;
1801             break;
1802         }
1803     }
1804 
1805     vmbus->state = VMBUS_OFFER;
1806 }
1807 
1808 static void send_offer(VMBus *vmbus)
1809 {
1810     VMBusChannel *chan;
1811     struct vmbus_message_header alloffers_msg = {
1812         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1813     };
1814 
1815     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1816         if (chan->offer_state == VMOFFER_SENDING) {
1817             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1818             /* Hyper-V wants LE GUIDs */
1819             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1820             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1821             struct vmbus_message_offer_channel msg = {
1822                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1823                 .child_relid = chan->id,
1824                 .connection_id = chan_connection_id(chan),
1825                 .channel_flags = vdc->channel_flags,
1826                 .mmio_size_mb = vdc->mmio_size_mb,
1827                 .sub_channel_index = vmbus_channel_idx(chan),
1828                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1829             };
1830 
1831             memcpy(msg.type_uuid, &classid, sizeof(classid));
1832             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1833 
1834             trace_vmbus_send_offer(chan->id, chan->dev);
1835 
1836             post_msg(vmbus, &msg, sizeof(msg));
1837             return;
1838         }
1839     }
1840 
1841     /* no more offers, send terminator message */
1842     trace_vmbus_terminate_offers();
1843     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1844 }
1845 
1846 static bool complete_offer(VMBus *vmbus)
1847 {
1848     VMBusChannel *chan;
1849 
1850     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1851         if (chan->offer_state == VMOFFER_SENDING) {
1852             chan->offer_state = VMOFFER_SENT;
1853             goto next_offer;
1854         }
1855     }
1856     /*
1857      * no transitioning channels found so this is completing the terminator
1858      * message, and vmbus can move to the next state
1859      */
1860     return true;
1861 
1862 next_offer:
1863     /* try to mark another channel for offering */
1864     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1865         if (chan->offer_state == VMOFFER_INIT) {
1866             chan->offer_state = VMOFFER_SENDING;
1867             break;
1868         }
1869     }
1870     /*
1871      * if an offer has been sent there are more offers or the terminator yet to
1872      * send, so no state transition for vmbus
1873      */
1874     return false;
1875 }
1876 
1877 
1878 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1879                                 uint32_t msglen)
1880 {
1881     VMBusGpadl *gpadl;
1882     uint32_t num_gfns, i;
1883 
1884     /* must include at least one gpa range */
1885     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1886         !vmbus_initialized(vmbus)) {
1887         return;
1888     }
1889 
1890     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1891                sizeof(msg->range[0].pfn_array[0]);
1892 
1893     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1894 
1895     /*
1896      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1897      * ranges each with arbitrary size and alignment.  However in practice only
1898      * single-range page-aligned GPADLs have been observed so just ignore
1899      * anything else and simplify things greatly.
1900      */
1901     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1902         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1903         return;
1904     }
1905 
1906     /* ignore requests to create already existing GPADLs */
1907     if (find_gpadl(vmbus, msg->gpadl_id)) {
1908         return;
1909     }
1910 
1911     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1912 
1913     for (i = 0; i < num_gfns &&
1914          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1915          i++) {
1916         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1917     }
1918 
1919     if (gpadl_full(gpadl)) {
1920         vmbus->state = VMBUS_CREATE_GPADL;
1921     }
1922 }
1923 
1924 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1925                               uint32_t msglen)
1926 {
1927     VMBusGpadl *gpadl;
1928     uint32_t num_gfns_left, i;
1929 
1930     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1931         return;
1932     }
1933 
1934     trace_vmbus_gpadl_body(msg->gpadl_id);
1935 
1936     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1937     if (!gpadl) {
1938         return;
1939     }
1940 
1941     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1942     assert(num_gfns_left);
1943 
1944     for (i = 0; i < num_gfns_left &&
1945          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1946         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1947     }
1948 
1949     if (gpadl_full(gpadl)) {
1950         vmbus->state = VMBUS_CREATE_GPADL;
1951     }
1952 }
1953 
1954 static void send_create_gpadl(VMBus *vmbus)
1955 {
1956     VMBusGpadl *gpadl;
1957 
1958     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1959         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1960             struct vmbus_message_gpadl_created msg = {
1961                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1962                 .gpadl_id = gpadl->id,
1963                 .child_relid = gpadl->child_relid,
1964             };
1965 
1966             trace_vmbus_gpadl_created(gpadl->id);
1967             post_msg(vmbus, &msg, sizeof(msg));
1968             return;
1969         }
1970     }
1971 
1972     assert(false);
1973 }
1974 
1975 static bool complete_create_gpadl(VMBus *vmbus)
1976 {
1977     VMBusGpadl *gpadl;
1978 
1979     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1980         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1981             gpadl->state = VMGPADL_ALIVE;
1982 
1983             return true;
1984         }
1985     }
1986 
1987     assert(false);
1988     return false;
1989 }
1990 
1991 static void handle_gpadl_teardown(VMBus *vmbus,
1992                                   vmbus_message_gpadl_teardown *msg,
1993                                   uint32_t msglen)
1994 {
1995     VMBusGpadl *gpadl;
1996 
1997     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1998         return;
1999     }
2000 
2001     trace_vmbus_gpadl_teardown(msg->gpadl_id);
2002 
2003     gpadl = find_gpadl(vmbus, msg->gpadl_id);
2004     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2005         return;
2006     }
2007 
2008     gpadl->state = VMGPADL_TEARINGDOWN;
2009     vmbus->state = VMBUS_TEARDOWN_GPADL;
2010 }
2011 
2012 static void send_teardown_gpadl(VMBus *vmbus)
2013 {
2014     VMBusGpadl *gpadl;
2015 
2016     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2017         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2018             struct vmbus_message_gpadl_torndown msg = {
2019                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2020                 .gpadl_id = gpadl->id,
2021             };
2022 
2023             trace_vmbus_gpadl_torndown(gpadl->id);
2024             post_msg(vmbus, &msg, sizeof(msg));
2025             return;
2026         }
2027     }
2028 
2029     assert(false);
2030 }
2031 
2032 static bool complete_teardown_gpadl(VMBus *vmbus)
2033 {
2034     VMBusGpadl *gpadl;
2035 
2036     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2037         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2038             gpadl->state = VMGPADL_TORNDOWN;
2039             vmbus_put_gpadl(gpadl);
2040             return true;
2041         }
2042     }
2043 
2044     assert(false);
2045     return false;
2046 }
2047 
2048 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2049                                 uint32_t msglen)
2050 {
2051     VMBusChannel *chan;
2052 
2053     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2054         return;
2055     }
2056 
2057     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2058                              msg->target_vp);
2059     chan = find_channel(vmbus, msg->child_relid);
2060     if (!chan || chan->state != VMCHAN_INIT) {
2061         return;
2062     }
2063 
2064     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2065     chan->ringbuf_send_offset = msg->ring_buffer_offset;
2066     chan->target_vp = msg->target_vp;
2067     chan->open_id = msg->open_id;
2068 
2069     open_channel(chan);
2070 
2071     chan->state = VMCHAN_OPENING;
2072     vmbus->state = VMBUS_OPEN_CHANNEL;
2073 }
2074 
2075 static void send_open_channel(VMBus *vmbus)
2076 {
2077     VMBusChannel *chan;
2078 
2079     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2080         if (chan->state == VMCHAN_OPENING) {
2081             struct vmbus_message_open_result msg = {
2082                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2083                 .child_relid = chan->id,
2084                 .open_id = chan->open_id,
2085                 .status = !vmbus_channel_is_open(chan),
2086             };
2087 
2088             trace_vmbus_channel_open(chan->id, msg.status);
2089             post_msg(vmbus, &msg, sizeof(msg));
2090             return;
2091         }
2092     }
2093 
2094     assert(false);
2095 }
2096 
2097 static bool complete_open_channel(VMBus *vmbus)
2098 {
2099     VMBusChannel *chan;
2100 
2101     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2102         if (chan->state == VMCHAN_OPENING) {
2103             if (vmbus_channel_is_open(chan)) {
2104                 chan->state = VMCHAN_OPEN;
2105                 /*
2106                  * simulate guest notification of ringbuffer space made
2107                  * available, for the channel protocols where the host
2108                  * initiates the communication
2109                  */
2110                 vmbus_channel_notify_host(chan);
2111             } else {
2112                 chan->state = VMCHAN_INIT;
2113             }
2114             return true;
2115         }
2116     }
2117 
2118     assert(false);
2119     return false;
2120 }
2121 
2122 static void vdev_reset_on_close(VMBusDevice *vdev)
2123 {
2124     uint16_t i;
2125 
2126     for (i = 0; i < vdev->num_channels; i++) {
2127         if (vmbus_channel_is_open(&vdev->channels[i])) {
2128             return;
2129         }
2130     }
2131 
2132     /* all channels closed -- reset device */
2133     qdev_reset_all(DEVICE(vdev));
2134 }
2135 
2136 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2137                                  uint32_t msglen)
2138 {
2139     VMBusChannel *chan;
2140 
2141     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2142         return;
2143     }
2144 
2145     trace_vmbus_close_channel(msg->child_relid);
2146 
2147     chan = find_channel(vmbus, msg->child_relid);
2148     if (!chan) {
2149         return;
2150     }
2151 
2152     close_channel(chan);
2153     chan->state = VMCHAN_INIT;
2154 
2155     vdev_reset_on_close(chan->dev);
2156 }
2157 
2158 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2159 {
2160     vmbus->state = VMBUS_UNLOAD;
2161 }
2162 
2163 static void send_unload(VMBus *vmbus)
2164 {
2165     vmbus_message_header msg = {
2166         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2167     };
2168 
2169     qemu_mutex_lock(&vmbus->rx_queue_lock);
2170     vmbus->rx_queue_size = 0;
2171     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2172 
2173     post_msg(vmbus, &msg, sizeof(msg));
2174     return;
2175 }
2176 
2177 static bool complete_unload(VMBus *vmbus)
2178 {
2179     vmbus_reset_all(vmbus);
2180     return true;
2181 }
2182 
2183 static void process_message(VMBus *vmbus)
2184 {
2185     struct hyperv_post_message_input *hv_msg;
2186     struct vmbus_message_header *msg;
2187     void *msgdata;
2188     uint32_t msglen;
2189 
2190     qemu_mutex_lock(&vmbus->rx_queue_lock);
2191 
2192     if (!vmbus->rx_queue_size) {
2193         goto unlock;
2194     }
2195 
2196     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2197     msglen =  hv_msg->payload_size;
2198     if (msglen < sizeof(*msg)) {
2199         goto out;
2200     }
2201     msgdata = hv_msg->payload;
2202     msg = (struct vmbus_message_header *)msgdata;
2203 
2204     trace_vmbus_process_incoming_message(msg->message_type);
2205 
2206     switch (msg->message_type) {
2207     case VMBUS_MSG_INITIATE_CONTACT:
2208         handle_initiate_contact(vmbus, msgdata, msglen);
2209         break;
2210     case VMBUS_MSG_REQUESTOFFERS:
2211         handle_request_offers(vmbus, msgdata, msglen);
2212         break;
2213     case VMBUS_MSG_GPADL_HEADER:
2214         handle_gpadl_header(vmbus, msgdata, msglen);
2215         break;
2216     case VMBUS_MSG_GPADL_BODY:
2217         handle_gpadl_body(vmbus, msgdata, msglen);
2218         break;
2219     case VMBUS_MSG_GPADL_TEARDOWN:
2220         handle_gpadl_teardown(vmbus, msgdata, msglen);
2221         break;
2222     case VMBUS_MSG_OPENCHANNEL:
2223         handle_open_channel(vmbus, msgdata, msglen);
2224         break;
2225     case VMBUS_MSG_CLOSECHANNEL:
2226         handle_close_channel(vmbus, msgdata, msglen);
2227         break;
2228     case VMBUS_MSG_UNLOAD:
2229         handle_unload(vmbus, msgdata, msglen);
2230         break;
2231     default:
2232         error_report("unknown message type %#x", msg->message_type);
2233         break;
2234     }
2235 
2236 out:
2237     vmbus->rx_queue_size--;
2238     vmbus->rx_queue_head++;
2239     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2240 
2241     vmbus_resched(vmbus);
2242 unlock:
2243     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2244 }
2245 
2246 static const struct {
2247     void (*run)(VMBus *vmbus);
2248     bool (*complete)(VMBus *vmbus);
2249 } state_runner[] = {
2250     [VMBUS_LISTEN]         = {process_message,     NULL},
2251     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2252     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2253     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2254     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2255     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2256     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2257 };
2258 
2259 static void vmbus_do_run(VMBus *vmbus)
2260 {
2261     if (vmbus->msg_in_progress) {
2262         return;
2263     }
2264 
2265     assert(vmbus->state < VMBUS_STATE_MAX);
2266     assert(state_runner[vmbus->state].run);
2267     state_runner[vmbus->state].run(vmbus);
2268 }
2269 
2270 static void vmbus_run(void *opaque)
2271 {
2272     VMBus *vmbus = opaque;
2273 
2274     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2275     if (vmbus->in_progress) {
2276         return;
2277     }
2278 
2279     vmbus->in_progress = true;
2280     /*
2281      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2282      * should go *after* the code that can result in aio_poll; otherwise
2283      * reschedules can be missed.  No idea how to enforce that.
2284      */
2285     vmbus_do_run(vmbus);
2286     vmbus->in_progress = false;
2287 }
2288 
2289 static void vmbus_msg_cb(void *data, int status)
2290 {
2291     VMBus *vmbus = data;
2292     bool (*complete)(VMBus *vmbus);
2293 
2294     assert(vmbus->msg_in_progress);
2295 
2296     trace_vmbus_msg_cb(status);
2297 
2298     if (status == -EAGAIN) {
2299         goto out;
2300     }
2301     if (status) {
2302         error_report("message delivery fatal failure: %d; aborting vmbus",
2303                      status);
2304         vmbus_reset_all(vmbus);
2305         return;
2306     }
2307 
2308     assert(vmbus->state < VMBUS_STATE_MAX);
2309     complete = state_runner[vmbus->state].complete;
2310     if (!complete || complete(vmbus)) {
2311         vmbus->state = VMBUS_LISTEN;
2312     }
2313 out:
2314     vmbus->msg_in_progress = false;
2315     vmbus_resched(vmbus);
2316 }
2317 
2318 static void vmbus_resched(VMBus *vmbus)
2319 {
2320     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2321 }
2322 
2323 static void vmbus_signal_event(EventNotifier *e)
2324 {
2325     VMBusChannel *chan;
2326     VMBus *vmbus = container_of(e, VMBus, notifier);
2327     unsigned long *int_map;
2328     hwaddr addr, len;
2329     bool is_dirty = false;
2330 
2331     if (!event_notifier_test_and_clear(e)) {
2332         return;
2333     }
2334 
2335     trace_vmbus_signal_event();
2336 
2337     if (!vmbus->int_page_gpa) {
2338         return;
2339     }
2340 
2341     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2342     len = TARGET_PAGE_SIZE / 2;
2343     int_map = cpu_physical_memory_map(addr, &len, 1);
2344     if (len != TARGET_PAGE_SIZE / 2) {
2345         goto unmap;
2346     }
2347 
2348     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2349         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2350             if (!vmbus_channel_is_open(chan)) {
2351                 continue;
2352             }
2353             vmbus_channel_notify_host(chan);
2354             is_dirty = true;
2355         }
2356     }
2357 
2358 unmap:
2359     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2360 }
2361 
2362 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2363 {
2364     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2365     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2366     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2367     BusChild *child;
2368     Error *err = NULL;
2369     char idstr[UUID_FMT_LEN + 1];
2370 
2371     assert(!qemu_uuid_is_null(&vdev->instanceid));
2372 
2373     /* Check for instance id collision for this class id */
2374     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2375         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2376 
2377         if (child_dev == vdev) {
2378             continue;
2379         }
2380 
2381         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2382             qemu_uuid_unparse(&vdev->instanceid, idstr);
2383             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2384             goto error_out;
2385         }
2386     }
2387 
2388     vdev->dma_as = &address_space_memory;
2389 
2390     create_channels(vmbus, vdev, &err);
2391     if (err) {
2392         goto error_out;
2393     }
2394 
2395     if (vdc->vmdev_realize) {
2396         vdc->vmdev_realize(vdev, &err);
2397         if (err) {
2398             goto err_vdc_realize;
2399         }
2400     }
2401     return;
2402 
2403 err_vdc_realize:
2404     free_channels(vdev);
2405 error_out:
2406     error_propagate(errp, err);
2407 }
2408 
2409 static void vmbus_dev_reset(DeviceState *dev)
2410 {
2411     uint16_t i;
2412     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2413     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2414 
2415     if (vdev->channels) {
2416         for (i = 0; i < vdev->num_channels; i++) {
2417             VMBusChannel *chan = &vdev->channels[i];
2418             close_channel(chan);
2419             chan->state = VMCHAN_INIT;
2420         }
2421     }
2422 
2423     if (vdc->vmdev_reset) {
2424         vdc->vmdev_reset(vdev);
2425     }
2426 }
2427 
2428 static void vmbus_dev_unrealize(DeviceState *dev)
2429 {
2430     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2431     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2432 
2433     if (vdc->vmdev_unrealize) {
2434         vdc->vmdev_unrealize(vdev);
2435     }
2436     free_channels(vdev);
2437 }
2438 
2439 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2440 {
2441     DeviceClass *kdev = DEVICE_CLASS(klass);
2442     kdev->bus_type = TYPE_VMBUS;
2443     kdev->realize = vmbus_dev_realize;
2444     kdev->unrealize = vmbus_dev_unrealize;
2445     kdev->reset = vmbus_dev_reset;
2446 }
2447 
2448 static Property vmbus_dev_instanceid =
2449                         DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
2450 
2451 static void vmbus_dev_instance_init(Object *obj)
2452 {
2453     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2454     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2455 
2456     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2457         /* Class wants to only have a single instance with a fixed UUID */
2458         vdev->instanceid = vdc->instanceid;
2459     } else {
2460         qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
2461     }
2462 }
2463 
2464 const VMStateDescription vmstate_vmbus_dev = {
2465     .name = TYPE_VMBUS_DEVICE,
2466     .version_id = 0,
2467     .minimum_version_id = 0,
2468     .fields = (VMStateField[]) {
2469         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2470         VMSTATE_UINT16(num_channels, VMBusDevice),
2471         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2472                                              num_channels, vmstate_channel,
2473                                              VMBusChannel),
2474         VMSTATE_END_OF_LIST()
2475     }
2476 };
2477 
2478 /* vmbus generic device base */
2479 static const TypeInfo vmbus_dev_type_info = {
2480     .name = TYPE_VMBUS_DEVICE,
2481     .parent = TYPE_DEVICE,
2482     .abstract = true,
2483     .instance_size = sizeof(VMBusDevice),
2484     .class_size = sizeof(VMBusDeviceClass),
2485     .class_init = vmbus_dev_class_init,
2486     .instance_init = vmbus_dev_instance_init,
2487 };
2488 
2489 static void vmbus_realize(BusState *bus, Error **errp)
2490 {
2491     int ret = 0;
2492     Error *local_err = NULL;
2493     VMBus *vmbus = VMBUS(bus);
2494 
2495     qemu_mutex_init(&vmbus->rx_queue_lock);
2496 
2497     QTAILQ_INIT(&vmbus->gpadl_list);
2498     QTAILQ_INIT(&vmbus->channel_list);
2499 
2500     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2501                                  vmbus_recv_message, vmbus);
2502     if (ret != 0) {
2503         error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2504         goto error_out;
2505     }
2506 
2507     ret = event_notifier_init(&vmbus->notifier, 0);
2508     if (ret != 0) {
2509         error_setg(&local_err, "event notifier failed to init with %d", ret);
2510         goto remove_msg_handler;
2511     }
2512 
2513     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2514     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2515                                         &vmbus->notifier);
2516     if (ret != 0) {
2517         error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2518         goto clear_event_notifier;
2519     }
2520 
2521     return;
2522 
2523 clear_event_notifier:
2524     event_notifier_cleanup(&vmbus->notifier);
2525 remove_msg_handler:
2526     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2527 error_out:
2528     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2529     error_propagate(errp, local_err);
2530 }
2531 
2532 static void vmbus_unrealize(BusState *bus)
2533 {
2534     VMBus *vmbus = VMBUS(bus);
2535 
2536     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2537     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2538     event_notifier_cleanup(&vmbus->notifier);
2539 
2540     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2541 }
2542 
2543 static void vmbus_reset(BusState *bus)
2544 {
2545     vmbus_deinit(VMBUS(bus));
2546 }
2547 
2548 static char *vmbus_get_dev_path(DeviceState *dev)
2549 {
2550     BusState *bus = qdev_get_parent_bus(dev);
2551     return qdev_get_dev_path(bus->parent);
2552 }
2553 
2554 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2555 {
2556     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2557     char uuid[UUID_FMT_LEN + 1];
2558 
2559     qemu_uuid_unparse(&vdev->instanceid, uuid);
2560     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2561 }
2562 
2563 static void vmbus_class_init(ObjectClass *klass, void *data)
2564 {
2565     BusClass *k = BUS_CLASS(klass);
2566 
2567     k->get_dev_path = vmbus_get_dev_path;
2568     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2569     k->realize = vmbus_realize;
2570     k->unrealize = vmbus_unrealize;
2571     k->reset = vmbus_reset;
2572 }
2573 
2574 static int vmbus_pre_load(void *opaque)
2575 {
2576     VMBusChannel *chan;
2577     VMBus *vmbus = VMBUS(opaque);
2578 
2579     /*
2580      * channel IDs allocated by the source will come in the migration stream
2581      * for each channel, so clean up the ones allocated at realize
2582      */
2583     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2584         unregister_chan_id(chan);
2585     }
2586 
2587     return 0;
2588 }
2589 static int vmbus_post_load(void *opaque, int version_id)
2590 {
2591     int ret;
2592     VMBus *vmbus = VMBUS(opaque);
2593     VMBusGpadl *gpadl;
2594     VMBusChannel *chan;
2595 
2596     ret = vmbus_init(vmbus);
2597     if (ret) {
2598         return ret;
2599     }
2600 
2601     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2602         gpadl->vmbus = vmbus;
2603         gpadl->refcount = 1;
2604     }
2605 
2606     /*
2607      * reopening channels depends on initialized vmbus so it's done here
2608      * instead of channel_post_load()
2609      */
2610     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2611 
2612         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2613             open_channel(chan);
2614         }
2615 
2616         if (chan->state != VMCHAN_OPEN) {
2617             continue;
2618         }
2619 
2620         if (!vmbus_channel_is_open(chan)) {
2621             /* reopen failed, abort loading */
2622             return -1;
2623         }
2624 
2625         /* resume processing on the guest side if it missed the notification */
2626         hyperv_sint_route_set_sint(chan->notify_route);
2627         /* ditto on the host side */
2628         vmbus_channel_notify_host(chan);
2629     }
2630 
2631     vmbus_resched(vmbus);
2632     return 0;
2633 }
2634 
2635 static const VMStateDescription vmstate_post_message_input = {
2636     .name = "vmbus/hyperv_post_message_input",
2637     .version_id = 0,
2638     .minimum_version_id = 0,
2639     .fields = (VMStateField[]) {
2640         /*
2641          * skip connection_id and message_type as they are validated before
2642          * queueing and ignored on dequeueing
2643          */
2644         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2645         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2646                             HV_MESSAGE_PAYLOAD_SIZE),
2647         VMSTATE_END_OF_LIST()
2648     }
2649 };
2650 
2651 static bool vmbus_rx_queue_needed(void *opaque)
2652 {
2653     VMBus *vmbus = VMBUS(opaque);
2654     return vmbus->rx_queue_size;
2655 }
2656 
2657 static const VMStateDescription vmstate_rx_queue = {
2658     .name = "vmbus/rx_queue",
2659     .version_id = 0,
2660     .minimum_version_id = 0,
2661     .needed = vmbus_rx_queue_needed,
2662     .fields = (VMStateField[]) {
2663         VMSTATE_UINT8(rx_queue_head, VMBus),
2664         VMSTATE_UINT8(rx_queue_size, VMBus),
2665         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2666                              HV_MSG_QUEUE_LEN, 0,
2667                              vmstate_post_message_input,
2668                              struct hyperv_post_message_input),
2669         VMSTATE_END_OF_LIST()
2670     }
2671 };
2672 
2673 static const VMStateDescription vmstate_vmbus = {
2674     .name = TYPE_VMBUS,
2675     .version_id = 0,
2676     .minimum_version_id = 0,
2677     .pre_load = vmbus_pre_load,
2678     .post_load = vmbus_post_load,
2679     .fields = (VMStateField[]) {
2680         VMSTATE_UINT8(state, VMBus),
2681         VMSTATE_UINT32(version, VMBus),
2682         VMSTATE_UINT32(target_vp, VMBus),
2683         VMSTATE_UINT64(int_page_gpa, VMBus),
2684         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2685                          vmstate_gpadl, VMBusGpadl, link),
2686         VMSTATE_END_OF_LIST()
2687     },
2688     .subsections = (const VMStateDescription * []) {
2689         &vmstate_rx_queue,
2690         NULL
2691     }
2692 };
2693 
2694 static const TypeInfo vmbus_type_info = {
2695     .name = TYPE_VMBUS,
2696     .parent = TYPE_BUS,
2697     .instance_size = sizeof(VMBus),
2698     .class_init = vmbus_class_init,
2699 };
2700 
2701 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2702 {
2703     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2704 
2705     /*
2706      * here there's at least one vmbus bridge that is being realized, so
2707      * vmbus_bridge_find can only return NULL if it's not unique
2708      */
2709     if (!vmbus_bridge_find()) {
2710         error_setg(errp, "there can be at most one %s in the system",
2711                    TYPE_VMBUS_BRIDGE);
2712         return;
2713     }
2714 
2715     if (!hyperv_is_synic_enabled()) {
2716         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2717         return;
2718     }
2719 
2720     bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus"));
2721 }
2722 
2723 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2724 {
2725     /* there can be only one VMBus */
2726     return g_strdup("0");
2727 }
2728 
2729 static const VMStateDescription vmstate_vmbus_bridge = {
2730     .name = TYPE_VMBUS_BRIDGE,
2731     .version_id = 0,
2732     .minimum_version_id = 0,
2733     .fields = (VMStateField[]) {
2734         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2735         VMSTATE_END_OF_LIST()
2736     },
2737 };
2738 
2739 static Property vmbus_bridge_props[] = {
2740     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2741     DEFINE_PROP_END_OF_LIST()
2742 };
2743 
2744 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2745 {
2746     DeviceClass *k = DEVICE_CLASS(klass);
2747     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2748 
2749     k->realize = vmbus_bridge_realize;
2750     k->fw_name = "vmbus";
2751     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2752     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2753     k->vmsd = &vmstate_vmbus_bridge;
2754     device_class_set_props(k, vmbus_bridge_props);
2755     /* override SysBusDevice's default */
2756     k->user_creatable = true;
2757 }
2758 
2759 static const TypeInfo vmbus_bridge_type_info = {
2760     .name = TYPE_VMBUS_BRIDGE,
2761     .parent = TYPE_SYS_BUS_DEVICE,
2762     .instance_size = sizeof(VMBusBridge),
2763     .class_init = vmbus_bridge_class_init,
2764 };
2765 
2766 static void vmbus_register_types(void)
2767 {
2768     type_register_static(&vmbus_bridge_type_info);
2769     type_register_static(&vmbus_dev_type_info);
2770     type_register_static(&vmbus_type_info);
2771 }
2772 
2773 type_init(vmbus_register_types)
2774