xref: /openbmc/qemu/hw/hyperv/vmbus.c (revision f7160f32)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/hyperv/hyperv.h"
17 #include "hw/hyperv/vmbus.h"
18 #include "hw/hyperv/vmbus-bridge.h"
19 #include "hw/sysbus.h"
20 #include "cpu.h"
21 #include "trace.h"
22 
23 #define TYPE_VMBUS "vmbus"
24 #define VMBUS(obj) OBJECT_CHECK(VMBus, (obj), TYPE_VMBUS)
25 
26 enum {
27     VMGPADL_INIT,
28     VMGPADL_ALIVE,
29     VMGPADL_TEARINGDOWN,
30     VMGPADL_TORNDOWN,
31 };
32 
33 struct VMBusGpadl {
34     /* GPADL id */
35     uint32_t id;
36     /* associated channel id (rudimentary?) */
37     uint32_t child_relid;
38 
39     /* number of pages in the GPADL as declared in GPADL_HEADER message */
40     uint32_t num_gfns;
41     /*
42      * Due to limited message size, GPADL may not fit fully in a single
43      * GPADL_HEADER message, and is further popluated using GPADL_BODY
44      * messages.  @seen_gfns is the number of pages seen so far; once it
45      * reaches @num_gfns, the GPADL is ready to use.
46      */
47     uint32_t seen_gfns;
48     /* array of GFNs (of size @num_gfns once allocated) */
49     uint64_t *gfns;
50 
51     uint8_t state;
52 
53     QTAILQ_ENTRY(VMBusGpadl) link;
54     VMBus *vmbus;
55     unsigned refcount;
56 };
57 
58 /*
59  * Wrap sequential read from / write to GPADL.
60  */
61 typedef struct GpadlIter {
62     VMBusGpadl *gpadl;
63     AddressSpace *as;
64     DMADirection dir;
65     /* offset into GPADL where the next i/o will be performed */
66     uint32_t off;
67     /*
68      * Cached mapping of the currently accessed page, up to page boundary.
69      * Updated lazily on i/o.
70      * Note: MemoryRegionCache can not be used here because pages in the GPADL
71      * are non-contiguous and may belong to different memory regions.
72      */
73     void *map;
74     /* offset after last i/o (i.e. not affected by seek) */
75     uint32_t last_off;
76     /*
77      * Indicator that the iterator is active and may have a cached mapping.
78      * Allows to enforce bracketing of all i/o (which may create cached
79      * mappings) and thus exclude mapping leaks.
80      */
81     bool active;
82 } GpadlIter;
83 
84 /*
85  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
86  * channel.
87  * Each ring buffer consists of a set of pages, with the first page containing
88  * the ring buffer header, and the remaining pages being for data packets.
89  */
90 typedef struct VMBusRingBufCommon {
91     AddressSpace *as;
92     /* GPA of the ring buffer header */
93     dma_addr_t rb_addr;
94     /* start and length of the ring buffer data area within GPADL */
95     uint32_t base;
96     uint32_t len;
97 
98     GpadlIter iter;
99 } VMBusRingBufCommon;
100 
101 typedef struct VMBusSendRingBuf {
102     VMBusRingBufCommon common;
103     /* current write index, to be committed at the end of send */
104     uint32_t wr_idx;
105     /* write index at the start of send */
106     uint32_t last_wr_idx;
107     /* space to be requested from the guest */
108     uint32_t wanted;
109     /* space reserved for planned sends */
110     uint32_t reserved;
111     /* last seen read index */
112     uint32_t last_seen_rd_idx;
113 } VMBusSendRingBuf;
114 
115 typedef struct VMBusRecvRingBuf {
116     VMBusRingBufCommon common;
117     /* current read index, to be committed at the end of receive */
118     uint32_t rd_idx;
119     /* read index at the start of receive */
120     uint32_t last_rd_idx;
121     /* last seen write index */
122     uint32_t last_seen_wr_idx;
123 } VMBusRecvRingBuf;
124 
125 
126 enum {
127     VMOFFER_INIT,
128     VMOFFER_SENDING,
129     VMOFFER_SENT,
130 };
131 
132 enum {
133     VMCHAN_INIT,
134     VMCHAN_OPENING,
135     VMCHAN_OPEN,
136 };
137 
138 struct VMBusChannel {
139     VMBusDevice *dev;
140 
141     /* channel id */
142     uint32_t id;
143     /*
144      * subchannel index within the device; subchannel #0 is "primary" and
145      * always exists
146      */
147     uint16_t subchan_idx;
148     uint32_t open_id;
149     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
150     uint32_t target_vp;
151     /* GPADL id to use for the ring buffers */
152     uint32_t ringbuf_gpadl;
153     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
154     uint32_t ringbuf_send_offset;
155 
156     uint8_t offer_state;
157     uint8_t state;
158     bool is_open;
159 
160     /* main device worker; copied from the device class */
161     VMBusChannelNotifyCb notify_cb;
162     /*
163      * guest->host notifications, either sent directly or dispatched via
164      * interrupt page (older VMBus)
165      */
166     EventNotifier notifier;
167 
168     VMBus *vmbus;
169     /*
170      * SINT route to signal with host->guest notifications; may be shared with
171      * the main VMBus SINT route
172      */
173     HvSintRoute *notify_route;
174     VMBusGpadl *gpadl;
175 
176     VMBusSendRingBuf send_ringbuf;
177     VMBusRecvRingBuf recv_ringbuf;
178 
179     QTAILQ_ENTRY(VMBusChannel) link;
180 };
181 
182 /*
183  * Hyper-V spec mandates that every message port has 16 buffers, which means
184  * that the guest can post up to this many messages without blocking.
185  * Therefore a queue for incoming messages has to be provided.
186  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
187  * doesn't transition to a new state until the message is known to have been
188  * successfully delivered to the respective SynIC message slot.
189  */
190 #define HV_MSG_QUEUE_LEN     16
191 
192 /* Hyper-V devices never use channel #0.  Must be something special. */
193 #define VMBUS_FIRST_CHANID      1
194 /* Each channel occupies one bit within a single event page sint slot. */
195 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
196 /* Leave a few connection numbers for other purposes. */
197 #define VMBUS_CHAN_CONNECTION_OFFSET     16
198 
199 /*
200  * Since the success or failure of sending a message is reported
201  * asynchronously, the VMBus state machine has effectively two entry points:
202  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
203  * message delivery status becomes known).  Both are run as oneshot BHs on the
204  * main aio context, ensuring serialization.
205  */
206 enum {
207     VMBUS_LISTEN,
208     VMBUS_HANDSHAKE,
209     VMBUS_OFFER,
210     VMBUS_CREATE_GPADL,
211     VMBUS_TEARDOWN_GPADL,
212     VMBUS_OPEN_CHANNEL,
213     VMBUS_UNLOAD,
214     VMBUS_STATE_MAX
215 };
216 
217 struct VMBus {
218     BusState parent;
219 
220     uint8_t state;
221     /* protection against recursive aio_poll (see vmbus_run) */
222     bool in_progress;
223     /* whether there's a message being delivered to the guest */
224     bool msg_in_progress;
225     uint32_t version;
226     /* VP_INDEX of the vCPU to send messages and interrupts to */
227     uint32_t target_vp;
228     HvSintRoute *sint_route;
229     /*
230      * interrupt page for older protocol versions; newer ones use SynIC event
231      * flags directly
232      */
233     hwaddr int_page_gpa;
234 
235     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
236 
237     /* incoming message queue */
238     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
239     uint8_t rx_queue_head;
240     uint8_t rx_queue_size;
241     QemuMutex rx_queue_lock;
242 
243     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
244     QTAILQ_HEAD(, VMBusChannel) channel_list;
245 
246     /*
247      * guest->host notifications for older VMBus, to be dispatched via
248      * interrupt page
249      */
250     EventNotifier notifier;
251 };
252 
253 static bool gpadl_full(VMBusGpadl *gpadl)
254 {
255     return gpadl->seen_gfns == gpadl->num_gfns;
256 }
257 
258 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
259                                 uint32_t child_relid, uint32_t num_gfns)
260 {
261     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
262 
263     gpadl->id = id;
264     gpadl->child_relid = child_relid;
265     gpadl->num_gfns = num_gfns;
266     gpadl->gfns = g_new(uint64_t, num_gfns);
267     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
268     gpadl->vmbus = vmbus;
269     gpadl->refcount = 1;
270     return gpadl;
271 }
272 
273 static void free_gpadl(VMBusGpadl *gpadl)
274 {
275     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
276     g_free(gpadl->gfns);
277     g_free(gpadl);
278 }
279 
280 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
281 {
282     VMBusGpadl *gpadl;
283     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
284         if (gpadl->id == gpadl_id) {
285             return gpadl;
286         }
287     }
288     return NULL;
289 }
290 
291 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
292 {
293     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
294     if (!gpadl || !gpadl_full(gpadl)) {
295         return NULL;
296     }
297     gpadl->refcount++;
298     return gpadl;
299 }
300 
301 void vmbus_put_gpadl(VMBusGpadl *gpadl)
302 {
303     if (!gpadl) {
304         return;
305     }
306     if (--gpadl->refcount) {
307         return;
308     }
309     free_gpadl(gpadl);
310 }
311 
312 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
313 {
314     return gpadl->num_gfns * TARGET_PAGE_SIZE;
315 }
316 
317 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
318                             AddressSpace *as, DMADirection dir)
319 {
320     iter->gpadl = gpadl;
321     iter->as = as;
322     iter->dir = dir;
323     iter->active = false;
324 }
325 
326 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
327 {
328     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
329     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
330 
331     /* mapping is only done to do non-zero amount of i/o */
332     assert(iter->last_off > 0);
333     assert(map_start_in_page < io_end_in_page);
334 
335     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
336                      iter->dir, io_end_in_page - map_start_in_page);
337 }
338 
339 /*
340  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
341  * The direction of the copy is determined by @iter->dir.
342  * The caller must ensure the operation overflows neither @buf nor the GPADL
343  * (there's an assert for the latter).
344  * Reuse the currently mapped page in the GPADL if possible.
345  */
346 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
347 {
348     ssize_t ret = len;
349 
350     assert(iter->active);
351 
352     while (len) {
353         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
354         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
355         uint32_t cplen = MIN(pgleft, len);
356         void *p;
357 
358         /* try to reuse the cached mapping */
359         if (iter->map) {
360             uint32_t map_start_in_page =
361                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
362             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
363             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
364             if (off_base != mapped_base || off_in_page < map_start_in_page) {
365                 gpadl_iter_cache_unmap(iter);
366                 iter->map = NULL;
367             }
368         }
369 
370         if (!iter->map) {
371             dma_addr_t maddr;
372             dma_addr_t mlen = pgleft;
373             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
374             assert(idx < iter->gpadl->num_gfns);
375 
376             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
377 
378             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
379             if (mlen != pgleft) {
380                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
381                 iter->map = NULL;
382                 return -EFAULT;
383             }
384         }
385 
386         p = (void *)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | off_in_page);
387         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
388             memcpy(p, buf, cplen);
389         } else {
390             memcpy(buf, p, cplen);
391         }
392 
393         buf += cplen;
394         len -= cplen;
395         iter->off += cplen;
396         iter->last_off = iter->off;
397     }
398 
399     return ret;
400 }
401 
402 /*
403  * Position the iterator @iter at new offset @new_off.
404  * If this results in the cached mapping being unusable with the new offset,
405  * unmap it.
406  */
407 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
408 {
409     assert(iter->active);
410     iter->off = new_off;
411 }
412 
413 /*
414  * Start a series of i/o on the GPADL.
415  * After this i/o and seek operations on @iter become legal.
416  */
417 static inline void gpadl_iter_start_io(GpadlIter *iter)
418 {
419     assert(!iter->active);
420     /* mapping is cached lazily on i/o */
421     iter->map = NULL;
422     iter->active = true;
423 }
424 
425 /*
426  * End the eariler started series of i/o on the GPADL and release the cached
427  * mapping if any.
428  */
429 static inline void gpadl_iter_end_io(GpadlIter *iter)
430 {
431     assert(iter->active);
432 
433     if (iter->map) {
434         gpadl_iter_cache_unmap(iter);
435     }
436 
437     iter->active = false;
438 }
439 
440 static void vmbus_resched(VMBus *vmbus);
441 static void vmbus_msg_cb(void *data, int status);
442 
443 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
444                            const struct iovec *iov, size_t iov_cnt)
445 {
446     GpadlIter iter;
447     size_t i;
448     ssize_t ret = 0;
449 
450     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
451                     DMA_DIRECTION_FROM_DEVICE);
452     gpadl_iter_start_io(&iter);
453     gpadl_iter_seek(&iter, off);
454     for (i = 0; i < iov_cnt; i++) {
455         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
456         if (ret < 0) {
457             goto out;
458         }
459     }
460 out:
461     gpadl_iter_end_io(&iter);
462     return ret;
463 }
464 
465 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
466                   unsigned iov_cnt, size_t len, size_t off)
467 {
468     int ret_cnt = 0, ret;
469     unsigned i;
470     QEMUSGList *sgl = &req->sgl;
471     ScatterGatherEntry *sg = sgl->sg;
472 
473     for (i = 0; i < sgl->nsg; i++) {
474         if (sg[i].len > off) {
475             break;
476         }
477         off -= sg[i].len;
478     }
479     for (; len && i < sgl->nsg; i++) {
480         dma_addr_t mlen = MIN(sg[i].len - off, len);
481         dma_addr_t addr = sg[i].base + off;
482         len -= mlen;
483         off = 0;
484 
485         for (; mlen; ret_cnt++) {
486             dma_addr_t l = mlen;
487             dma_addr_t a = addr;
488 
489             if (ret_cnt == iov_cnt) {
490                 ret = -ENOBUFS;
491                 goto err;
492             }
493 
494             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
495             if (!l) {
496                 ret = -EFAULT;
497                 goto err;
498             }
499             iov[ret_cnt].iov_len = l;
500             addr += l;
501             mlen -= l;
502         }
503     }
504 
505     return ret_cnt;
506 err:
507     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
508     return ret;
509 }
510 
511 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
512                      unsigned iov_cnt, size_t accessed)
513 {
514     QEMUSGList *sgl = &req->sgl;
515     unsigned i;
516 
517     for (i = 0; i < iov_cnt; i++) {
518         size_t acsd = MIN(accessed, iov[i].iov_len);
519         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
520         accessed -= acsd;
521     }
522 }
523 
524 static const VMStateDescription vmstate_gpadl = {
525     .name = "vmbus/gpadl",
526     .version_id = 0,
527     .minimum_version_id = 0,
528     .fields = (VMStateField[]) {
529         VMSTATE_UINT32(id, VMBusGpadl),
530         VMSTATE_UINT32(child_relid, VMBusGpadl),
531         VMSTATE_UINT32(num_gfns, VMBusGpadl),
532         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
533         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
534                                     vmstate_info_uint64, uint64_t),
535         VMSTATE_UINT8(state, VMBusGpadl),
536         VMSTATE_END_OF_LIST()
537     }
538 };
539 
540 /*
541  * Wrap the index into a ring buffer of @len bytes.
542  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
543  * single wraparound is considered.
544  */
545 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
546 {
547     if (idx >= len) {
548         idx -= len;
549     }
550     return idx;
551 }
552 
553 /*
554  * Circular difference between two indices into a ring buffer of @len bytes.
555  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
556  * up write index but not vice versa.
557  */
558 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
559                                     bool allow_catchup)
560 {
561     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
562 }
563 
564 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
565 {
566     vmbus_ring_buffer *rb;
567     dma_addr_t mlen = sizeof(*rb);
568 
569     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
570                         DMA_DIRECTION_FROM_DEVICE);
571     if (mlen != sizeof(*rb)) {
572         dma_memory_unmap(ringbuf->as, rb, mlen,
573                          DMA_DIRECTION_FROM_DEVICE, 0);
574         return NULL;
575     }
576     return rb;
577 }
578 
579 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
580                               vmbus_ring_buffer *rb, bool dirty)
581 {
582     assert(rb);
583 
584     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
585                      dirty ? sizeof(*rb) : 0);
586 }
587 
588 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
589                                 AddressSpace *as, DMADirection dir,
590                                 uint32_t begin, uint32_t end)
591 {
592     ringbuf->as = as;
593     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
594     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
595     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
596     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
597 }
598 
599 static int ringbufs_init(VMBusChannel *chan)
600 {
601     vmbus_ring_buffer *rb;
602     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
603     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
604 
605     if (chan->ringbuf_send_offset <= 1 ||
606         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
607         return -EINVAL;
608     }
609 
610     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
611                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
612     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
613                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
614                         chan->gpadl->num_gfns);
615     send_ringbuf->wanted = 0;
616     send_ringbuf->reserved = 0;
617 
618     rb = ringbuf_map_hdr(&recv_ringbuf->common);
619     if (!rb) {
620         return -EFAULT;
621     }
622     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
623     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
624 
625     rb = ringbuf_map_hdr(&send_ringbuf->common);
626     if (!rb) {
627         return -EFAULT;
628     }
629     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
630     send_ringbuf->last_seen_rd_idx = rb->read_index;
631     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
632     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
633 
634     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
635         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
636         return -EOVERFLOW;
637     }
638 
639     return 0;
640 }
641 
642 /*
643  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
644  * around if needed.
645  * @len is assumed not to exceed the size of the ringbuffer, so only single
646  * wraparound is considered.
647  */
648 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
649 {
650     ssize_t ret1 = 0, ret2 = 0;
651     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
652 
653     if (len >= remain) {
654         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
655         if (ret1 < 0) {
656             return ret1;
657         }
658         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
659         buf += remain;
660         len -= remain;
661     }
662     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
663     if (ret2 < 0) {
664         return ret2;
665     }
666     return ret1 + ret2;
667 }
668 
669 /*
670  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
671  * around if needed.
672  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
673  * single wraparound is considered.
674  */
675 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
676 {
677     gpadl_iter_seek(&ringbuf->iter,
678                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
679 }
680 
681 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
682 {
683     return ringbuf->iter.off - ringbuf->base;
684 }
685 
686 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
687 {
688     gpadl_iter_start_io(&ringbuf->iter);
689 }
690 
691 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
692 {
693     gpadl_iter_end_io(&ringbuf->iter);
694 }
695 
696 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
697 {
698     return chan->dev;
699 }
700 
701 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
702 {
703     if (chan_idx >= dev->num_channels) {
704         return NULL;
705     }
706     return &dev->channels[chan_idx];
707 }
708 
709 uint32_t vmbus_channel_idx(VMBusChannel *chan)
710 {
711     return chan - chan->dev->channels;
712 }
713 
714 void vmbus_channel_notify_host(VMBusChannel *chan)
715 {
716     event_notifier_set(&chan->notifier);
717 }
718 
719 bool vmbus_channel_is_open(VMBusChannel *chan)
720 {
721     return chan->is_open;
722 }
723 
724 /*
725  * Notify the guest side about the data to work on in the channel ring buffer.
726  * The notification is done by signaling a dedicated per-channel SynIC event
727  * flag (more recent guests) or setting a bit in the interrupt page and firing
728  * the VMBus SINT (older guests).
729  */
730 static int vmbus_channel_notify_guest(VMBusChannel *chan)
731 {
732     int res = 0;
733     unsigned long *int_map, mask;
734     unsigned idx;
735     hwaddr addr = chan->vmbus->int_page_gpa;
736     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
737 
738     trace_vmbus_channel_notify_guest(chan->id);
739 
740     if (!addr) {
741         return hyperv_set_event_flag(chan->notify_route, chan->id);
742     }
743 
744     int_map = cpu_physical_memory_map(addr, &len, 1);
745     if (len != TARGET_PAGE_SIZE / 2) {
746         res = -ENXIO;
747         goto unmap;
748     }
749 
750     idx = BIT_WORD(chan->id);
751     mask = BIT_MASK(chan->id);
752     if ((atomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
753         res = hyperv_sint_route_set_sint(chan->notify_route);
754         dirty = len;
755     }
756 
757 unmap:
758     cpu_physical_memory_unmap(int_map, len, 1, dirty);
759     return res;
760 }
761 
762 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
763 
764 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
765                                           uint32_t desclen, uint32_t msglen)
766 {
767     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
768         DIV_ROUND_UP(desclen, sizeof(uint64_t));
769     hdr->len_qwords = hdr->offset_qwords +
770         DIV_ROUND_UP(msglen, sizeof(uint64_t));
771     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
772 }
773 
774 /*
775  * Simplified ring buffer operation with paired barriers annotations in the
776  * producer and consumer loops:
777  *
778  * producer                           * consumer
779  * ~~~~~~~~                           * ~~~~~~~~
780  * write pending_send_sz              * read write_index
781  * smp_mb                       [A]   * smp_mb                       [C]
782  * read read_index                    * read packet
783  * smp_mb                       [B]   * read/write out-of-band data
784  * read/write out-of-band data        * smp_mb                       [B]
785  * write packet                       * write read_index
786  * smp_mb                       [C]   * smp_mb                       [A]
787  * write write_index                  * read pending_send_sz
788  * smp_wmb                      [D]   * smp_rmb                      [D]
789  * write pending_send_sz              * read write_index
790  * ...                                * ...
791  */
792 
793 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
794 {
795     /* don't trust guest data */
796     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
797         return 0;
798     }
799     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
800                         ringbuf->common.len, false);
801 }
802 
803 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
804 {
805     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
806     vmbus_ring_buffer *rb;
807     uint32_t written;
808 
809     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
810                            ringbuf->common.len, true);
811     if (!written) {
812         return 0;
813     }
814 
815     rb = ringbuf_map_hdr(&ringbuf->common);
816     if (!rb) {
817         return -EFAULT;
818     }
819 
820     ringbuf->reserved -= written;
821 
822     /* prevent reorder with the data operation and packet write */
823     smp_mb();                   /* barrier pair [C] */
824     rb->write_index = ringbuf->wr_idx;
825 
826     /*
827      * If the producer earlier indicated that it wants to be notified when the
828      * consumer frees certain amount of space in the ring buffer, that amount
829      * is reduced by the size of the completed write.
830      */
831     if (ringbuf->wanted) {
832         /* otherwise reservation would fail */
833         assert(ringbuf->wanted < written);
834         ringbuf->wanted -= written;
835         /* prevent reorder with write_index write */
836         smp_wmb();              /* barrier pair [D] */
837         rb->pending_send_sz = ringbuf->wanted;
838     }
839 
840     /* prevent reorder with write_index or pending_send_sz write */
841     smp_mb();                   /* barrier pair [A] */
842     ringbuf->last_seen_rd_idx = rb->read_index;
843 
844     /*
845      * The consumer may have missed the reduction of pending_send_sz and skip
846      * notification, so re-check the blocking condition, and, if it's no longer
847      * true, ensure processing another iteration by simulating consumer's
848      * notification.
849      */
850     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
851         vmbus_channel_notify_host(chan);
852     }
853 
854     /* skip notification by consumer's request */
855     if (rb->interrupt_mask) {
856         goto out;
857     }
858 
859     /*
860      * The consumer hasn't caught up with the producer's previous state so it's
861      * not blocked.
862      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
863      * validation here as it only affects notification.)
864      */
865     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
866                      ringbuf->common.len, true) > written) {
867         goto out;
868     }
869 
870     vmbus_channel_notify_guest(chan);
871 out:
872     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
873     ringbuf->last_wr_idx = ringbuf->wr_idx;
874     return written;
875 }
876 
877 int vmbus_channel_reserve(VMBusChannel *chan,
878                           uint32_t desclen, uint32_t msglen)
879 {
880     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
881     vmbus_ring_buffer *rb = NULL;
882     vmbus_packet_hdr hdr;
883     uint32_t needed = ringbuf->reserved +
884         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
885 
886     /* avoid touching the guest memory if possible */
887     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
888         goto success;
889     }
890 
891     rb = ringbuf_map_hdr(&ringbuf->common);
892     if (!rb) {
893         return -EFAULT;
894     }
895 
896     /* fetch read index from guest memory and try again */
897     ringbuf->last_seen_rd_idx = rb->read_index;
898 
899     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
900         goto success;
901     }
902 
903     rb->pending_send_sz = needed;
904 
905     /*
906      * The consumer may have made progress and freed up some space before
907      * seeing updated pending_send_sz, so re-read read_index (preventing
908      * reorder with the pending_send_sz write) and try again.
909      */
910     smp_mb();                   /* barrier pair [A] */
911     ringbuf->last_seen_rd_idx = rb->read_index;
912 
913     if (needed > ringbuf_send_avail(ringbuf)) {
914         goto out;
915     }
916 
917 success:
918     ringbuf->reserved = needed;
919     needed = 0;
920 
921     /* clear pending_send_sz if it was set */
922     if (ringbuf->wanted) {
923         if (!rb) {
924             rb = ringbuf_map_hdr(&ringbuf->common);
925             if (!rb) {
926                 /* failure to clear pending_send_sz is non-fatal */
927                 goto out;
928             }
929         }
930 
931         rb->pending_send_sz = 0;
932     }
933 
934     /* prevent reorder of the following data operation with read_index read */
935     smp_mb();                   /* barrier pair [B] */
936 
937 out:
938     if (rb) {
939         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
940     }
941     ringbuf->wanted = needed;
942     return needed ? -ENOSPC : 0;
943 }
944 
945 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
946                            void *desc, uint32_t desclen,
947                            void *msg, uint32_t msglen,
948                            bool need_comp, uint64_t transaction_id)
949 {
950     ssize_t ret = 0;
951     vmbus_packet_hdr hdr;
952     uint32_t totlen;
953     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
954 
955     if (!vmbus_channel_is_open(chan)) {
956         return -EINVAL;
957     }
958 
959     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
960     hdr.type = pkt_type;
961     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
962     hdr.transaction_id = transaction_id;
963 
964     assert(totlen <= ringbuf->reserved);
965 
966     ringbuf_start_io(&ringbuf->common);
967     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
968     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
969     if (ret < 0) {
970         goto out;
971     }
972     if (desclen) {
973         assert(desc);
974         ret = ringbuf_io(&ringbuf->common, desc, desclen);
975         if (ret < 0) {
976             goto out;
977         }
978         ringbuf_seek(&ringbuf->common,
979                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
980     }
981     ret = ringbuf_io(&ringbuf->common, msg, msglen);
982     if (ret < 0) {
983         goto out;
984     }
985     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
986     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
987     ret = 0;
988 out:
989     ringbuf_end_io(&ringbuf->common);
990     if (ret) {
991         return ret;
992     }
993     return ringbuf_send_update_idx(chan);
994 }
995 
996 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
997                                       void *msg, uint32_t msglen)
998 {
999     assert(req->need_comp);
1000     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1001                               msg, msglen, false, req->transaction_id);
1002 }
1003 
1004 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1005                                VMBusRingBufCommon *ringbuf, uint32_t len)
1006 {
1007     int ret;
1008     vmbus_pkt_gpa_direct hdr;
1009     hwaddr curaddr = 0;
1010     hwaddr curlen = 0;
1011     int num;
1012 
1013     if (len < sizeof(hdr)) {
1014         return -EIO;
1015     }
1016     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1017     if (ret < 0) {
1018         return ret;
1019     }
1020     len -= sizeof(hdr);
1021 
1022     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1023     if (num < 0) {
1024         return -EIO;
1025     }
1026     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1027 
1028     for (; hdr.rangecount; hdr.rangecount--) {
1029         vmbus_gpa_range range;
1030 
1031         if (len < sizeof(range)) {
1032             goto eio;
1033         }
1034         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1035         if (ret < 0) {
1036             goto err;
1037         }
1038         len -= sizeof(range);
1039 
1040         if (range.byte_offset & TARGET_PAGE_MASK) {
1041             goto eio;
1042         }
1043 
1044         for (; range.byte_count; range.byte_offset = 0) {
1045             uint64_t paddr;
1046             uint32_t plen = MIN(range.byte_count,
1047                                 TARGET_PAGE_SIZE - range.byte_offset);
1048 
1049             if (len < sizeof(uint64_t)) {
1050                 goto eio;
1051             }
1052             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1053             if (ret < 0) {
1054                 goto err;
1055             }
1056             len -= sizeof(uint64_t);
1057             paddr <<= TARGET_PAGE_BITS;
1058             paddr |= range.byte_offset;
1059             range.byte_count -= plen;
1060 
1061             if (curaddr + curlen == paddr) {
1062                 /* consecutive fragments - join */
1063                 curlen += plen;
1064             } else {
1065                 if (curlen) {
1066                     qemu_sglist_add(sgl, curaddr, curlen);
1067                 }
1068 
1069                 curaddr = paddr;
1070                 curlen = plen;
1071             }
1072         }
1073     }
1074 
1075     if (curlen) {
1076         qemu_sglist_add(sgl, curaddr, curlen);
1077     }
1078 
1079     return 0;
1080 eio:
1081     ret = -EIO;
1082 err:
1083     qemu_sglist_destroy(sgl);
1084     return ret;
1085 }
1086 
1087 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1088                                      uint32_t size, uint16_t pkt_type,
1089                                      uint32_t msglen, uint64_t transaction_id,
1090                                      bool need_comp)
1091 {
1092     VMBusChanReq *req;
1093     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1094     uint32_t totlen = msgoff + msglen;
1095 
1096     req = g_malloc0(totlen);
1097     req->chan = chan;
1098     req->pkt_type = pkt_type;
1099     req->msg = (void *)req + msgoff;
1100     req->msglen = msglen;
1101     req->transaction_id = transaction_id;
1102     req->need_comp = need_comp;
1103     return req;
1104 }
1105 
1106 int vmbus_channel_recv_start(VMBusChannel *chan)
1107 {
1108     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1109     vmbus_ring_buffer *rb;
1110 
1111     rb = ringbuf_map_hdr(&ringbuf->common);
1112     if (!rb) {
1113         return -EFAULT;
1114     }
1115     ringbuf->last_seen_wr_idx = rb->write_index;
1116     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1117 
1118     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1119         return -EOVERFLOW;
1120     }
1121 
1122     /* prevent reorder of the following data operation with write_index read */
1123     smp_mb();                   /* barrier pair [C] */
1124     return 0;
1125 }
1126 
1127 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1128 {
1129     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1130     vmbus_packet_hdr hdr = {};
1131     VMBusChanReq *req;
1132     uint32_t avail;
1133     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1134 
1135     assert(size >= sizeof(*req));
1136 
1137     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1138     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1139                          ringbuf->common.len, true);
1140     if (avail < sizeof(hdr)) {
1141         return NULL;
1142     }
1143 
1144     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1145     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1146         return NULL;
1147     }
1148 
1149     pktlen = hdr.len_qwords * sizeof(uint64_t);
1150     totlen = pktlen + VMBUS_PKT_TRAILER;
1151     if (totlen > avail) {
1152         return NULL;
1153     }
1154 
1155     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1156     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1157         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1158         return NULL;
1159     }
1160 
1161     msglen = pktlen - msgoff;
1162 
1163     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1164                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1165 
1166     switch (hdr.type) {
1167     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1168         desclen = msgoff - sizeof(hdr);
1169         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1170                                 desclen) < 0) {
1171             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1172             goto free_req;
1173         }
1174         break;
1175     case VMBUS_PACKET_DATA_INBAND:
1176     case VMBUS_PACKET_COMP:
1177         break;
1178     default:
1179         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1180         goto free_req;
1181     }
1182 
1183     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1184     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1185         goto free_req;
1186     }
1187     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1188 
1189     return req;
1190 free_req:
1191     vmbus_free_req(req);
1192     return NULL;
1193 }
1194 
1195 void vmbus_channel_recv_pop(VMBusChannel *chan)
1196 {
1197     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1198     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1199 }
1200 
1201 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1202 {
1203     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1204     vmbus_ring_buffer *rb;
1205     uint32_t read;
1206 
1207     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1208                         ringbuf->common.len, true);
1209     if (!read) {
1210         return 0;
1211     }
1212 
1213     rb = ringbuf_map_hdr(&ringbuf->common);
1214     if (!rb) {
1215         return -EFAULT;
1216     }
1217 
1218     /* prevent reorder with the data operation and packet read */
1219     smp_mb();                   /* barrier pair [B] */
1220     rb->read_index = ringbuf->rd_idx;
1221 
1222     /* prevent reorder of the following pending_send_sz read */
1223     smp_mb();                   /* barrier pair [A] */
1224 
1225     if (rb->interrupt_mask) {
1226         goto out;
1227     }
1228 
1229     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1230         uint32_t wr_idx, wr_avail;
1231         uint32_t wanted = rb->pending_send_sz;
1232 
1233         if (!wanted) {
1234             goto out;
1235         }
1236 
1237         /* prevent reorder with pending_send_sz read */
1238         smp_rmb();              /* barrier pair [D] */
1239         wr_idx = rb->write_index;
1240 
1241         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1242                                 true);
1243 
1244         /* the producer wasn't blocked on the consumer state */
1245         if (wr_avail >= read + wanted) {
1246             goto out;
1247         }
1248         /* there's not enough space for the producer to make progress */
1249         if (wr_avail < wanted) {
1250             goto out;
1251         }
1252     }
1253 
1254     vmbus_channel_notify_guest(chan);
1255 out:
1256     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1257     ringbuf->last_rd_idx = ringbuf->rd_idx;
1258     return read;
1259 }
1260 
1261 void vmbus_free_req(void *req)
1262 {
1263     VMBusChanReq *r = req;
1264 
1265     if (!req) {
1266         return;
1267     }
1268 
1269     if (r->sgl.dev) {
1270         qemu_sglist_destroy(&r->sgl);
1271     }
1272     g_free(req);
1273 }
1274 
1275 static const VMStateDescription vmstate_sgent = {
1276     .name = "vmbus/sgentry",
1277     .version_id = 0,
1278     .minimum_version_id = 0,
1279     .fields = (VMStateField[]) {
1280         VMSTATE_UINT64(base, ScatterGatherEntry),
1281         VMSTATE_UINT64(len, ScatterGatherEntry),
1282         VMSTATE_END_OF_LIST()
1283     }
1284 };
1285 
1286 typedef struct VMBusChanReqSave {
1287     uint16_t chan_idx;
1288     uint16_t pkt_type;
1289     uint32_t msglen;
1290     void *msg;
1291     uint64_t transaction_id;
1292     bool need_comp;
1293     uint32_t num;
1294     ScatterGatherEntry *sgl;
1295 } VMBusChanReqSave;
1296 
1297 static const VMStateDescription vmstate_vmbus_chan_req = {
1298     .name = "vmbus/vmbus_chan_req",
1299     .version_id = 0,
1300     .minimum_version_id = 0,
1301     .fields = (VMStateField[]) {
1302         VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1303         VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1304         VMSTATE_UINT32(msglen, VMBusChanReqSave),
1305         VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1306         VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1307         VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1308         VMSTATE_UINT32(num, VMBusChanReqSave),
1309         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1310                                              vmstate_sgent, ScatterGatherEntry),
1311         VMSTATE_END_OF_LIST()
1312     }
1313 };
1314 
1315 void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1316 {
1317     VMBusChanReqSave req_save;
1318 
1319     req_save.chan_idx = req->chan->subchan_idx;
1320     req_save.pkt_type = req->pkt_type;
1321     req_save.msglen = req->msglen;
1322     req_save.msg = req->msg;
1323     req_save.transaction_id = req->transaction_id;
1324     req_save.need_comp = req->need_comp;
1325     req_save.num = req->sgl.nsg;
1326     req_save.sgl = g_memdup(req->sgl.sg,
1327                             req_save.num * sizeof(ScatterGatherEntry));
1328 
1329     vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1330 
1331     g_free(req_save.sgl);
1332 }
1333 
1334 void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1335 {
1336     VMBusChanReqSave req_save;
1337     VMBusChanReq *req = NULL;
1338     VMBusChannel *chan = NULL;
1339     uint32_t i;
1340 
1341     vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1342 
1343     if (req_save.chan_idx >= dev->num_channels) {
1344         error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1345                      req_save.chan_idx, dev->num_channels);
1346         goto out;
1347     }
1348     chan = &dev->channels[req_save.chan_idx];
1349 
1350     if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1351         goto out;
1352     }
1353 
1354     req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1355                           req_save.transaction_id, req_save.need_comp);
1356     if (req_save.msglen) {
1357         memcpy(req->msg, req_save.msg, req_save.msglen);
1358     }
1359 
1360     for (i = 0; i < req_save.num; i++) {
1361         qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1362     }
1363 
1364 out:
1365     if (req_save.msglen) {
1366         g_free(req_save.msg);
1367     }
1368     if (req_save.num) {
1369         g_free(req_save.sgl);
1370     }
1371     return req;
1372 }
1373 
1374 static void channel_event_cb(EventNotifier *e)
1375 {
1376     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1377     if (event_notifier_test_and_clear(e)) {
1378         /*
1379          * All receives are supposed to happen within the device worker, so
1380          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1381          * potentially reuse the cached mapping throughout the worker.
1382          * Can't do this for sends as they may happen outside the device
1383          * worker.
1384          */
1385         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1386         ringbuf_start_io(&ringbuf->common);
1387         chan->notify_cb(chan);
1388         ringbuf_end_io(&ringbuf->common);
1389 
1390     }
1391 }
1392 
1393 static int alloc_chan_id(VMBus *vmbus)
1394 {
1395     int ret;
1396 
1397     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1398     if (ret == VMBUS_CHANID_COUNT) {
1399         return -ENOMEM;
1400     }
1401     return ret + VMBUS_FIRST_CHANID;
1402 }
1403 
1404 static int register_chan_id(VMBusChannel *chan)
1405 {
1406     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1407                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1408 }
1409 
1410 static void unregister_chan_id(VMBusChannel *chan)
1411 {
1412     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1413 }
1414 
1415 static uint32_t chan_connection_id(VMBusChannel *chan)
1416 {
1417     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1418 }
1419 
1420 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1421                          VMBusChannel *chan, uint16_t idx, Error **errp)
1422 {
1423     int res;
1424 
1425     chan->dev = dev;
1426     chan->notify_cb = vdc->chan_notify_cb;
1427     chan->subchan_idx = idx;
1428     chan->vmbus = vmbus;
1429 
1430     res = alloc_chan_id(vmbus);
1431     if (res < 0) {
1432         error_setg(errp, "no spare channel id");
1433         return;
1434     }
1435     chan->id = res;
1436     register_chan_id(chan);
1437 
1438     /*
1439      * The guest drivers depend on the device subchannels (idx #1+) to be
1440      * offered after the primary channel (idx #0) of that device.  To ensure
1441      * that, record the channels on the channel list in the order they appear
1442      * within the device.
1443      */
1444     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1445 }
1446 
1447 static void deinit_channel(VMBusChannel *chan)
1448 {
1449     assert(chan->state == VMCHAN_INIT);
1450     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1451     unregister_chan_id(chan);
1452 }
1453 
1454 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1455 {
1456     uint16_t i;
1457     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1458     Error *err = NULL;
1459 
1460     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1461     if (dev->num_channels < 1) {
1462         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1463         return;
1464     }
1465 
1466     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1467     for (i = 0; i < dev->num_channels; i++) {
1468         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1469         if (err) {
1470             goto err_init;
1471         }
1472     }
1473 
1474     return;
1475 
1476 err_init:
1477     while (i--) {
1478         deinit_channel(&dev->channels[i]);
1479     }
1480     error_propagate(errp, err);
1481 }
1482 
1483 static void free_channels(VMBusDevice *dev)
1484 {
1485     uint16_t i;
1486     for (i = 0; i < dev->num_channels; i++) {
1487         deinit_channel(&dev->channels[i]);
1488     }
1489     g_free(dev->channels);
1490 }
1491 
1492 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1493 {
1494     VMBusChannel *chan;
1495 
1496     if (vp_index == vmbus->target_vp) {
1497         hyperv_sint_route_ref(vmbus->sint_route);
1498         return vmbus->sint_route;
1499     }
1500 
1501     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1502         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1503             hyperv_sint_route_ref(chan->notify_route);
1504             return chan->notify_route;
1505         }
1506     }
1507 
1508     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1509 }
1510 
1511 static void open_channel(VMBusChannel *chan)
1512 {
1513     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1514 
1515     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1516     if (!chan->gpadl) {
1517         return;
1518     }
1519 
1520     if (ringbufs_init(chan)) {
1521         goto put_gpadl;
1522     }
1523 
1524     if (event_notifier_init(&chan->notifier, 0)) {
1525         goto put_gpadl;
1526     }
1527 
1528     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1529 
1530     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1531                                       &chan->notifier)) {
1532         goto cleanup_notifier;
1533     }
1534 
1535     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1536     if (!chan->notify_route) {
1537         goto clear_event_flag_handler;
1538     }
1539 
1540     if (vdc->open_channel && vdc->open_channel(chan)) {
1541         goto unref_sint_route;
1542     }
1543 
1544     chan->is_open = true;
1545     return;
1546 
1547 unref_sint_route:
1548     hyperv_sint_route_unref(chan->notify_route);
1549 clear_event_flag_handler:
1550     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1551 cleanup_notifier:
1552     event_notifier_set_handler(&chan->notifier, NULL);
1553     event_notifier_cleanup(&chan->notifier);
1554 put_gpadl:
1555     vmbus_put_gpadl(chan->gpadl);
1556 }
1557 
1558 static void close_channel(VMBusChannel *chan)
1559 {
1560     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1561 
1562     if (!chan->is_open) {
1563         return;
1564     }
1565 
1566     if (vdc->close_channel) {
1567         vdc->close_channel(chan);
1568     }
1569 
1570     hyperv_sint_route_unref(chan->notify_route);
1571     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1572     event_notifier_set_handler(&chan->notifier, NULL);
1573     event_notifier_cleanup(&chan->notifier);
1574     vmbus_put_gpadl(chan->gpadl);
1575     chan->is_open = false;
1576 }
1577 
1578 static int channel_post_load(void *opaque, int version_id)
1579 {
1580     VMBusChannel *chan = opaque;
1581 
1582     return register_chan_id(chan);
1583 }
1584 
1585 static const VMStateDescription vmstate_channel = {
1586     .name = "vmbus/channel",
1587     .version_id = 0,
1588     .minimum_version_id = 0,
1589     .post_load = channel_post_load,
1590     .fields = (VMStateField[]) {
1591         VMSTATE_UINT32(id, VMBusChannel),
1592         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1593         VMSTATE_UINT32(open_id, VMBusChannel),
1594         VMSTATE_UINT32(target_vp, VMBusChannel),
1595         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1596         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1597         VMSTATE_UINT8(offer_state, VMBusChannel),
1598         VMSTATE_UINT8(state, VMBusChannel),
1599         VMSTATE_END_OF_LIST()
1600     }
1601 };
1602 
1603 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1604 {
1605     VMBusChannel *chan;
1606     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1607         if (chan->id == id) {
1608             return chan;
1609         }
1610     }
1611     return NULL;
1612 }
1613 
1614 static int enqueue_incoming_message(VMBus *vmbus,
1615                                     const struct hyperv_post_message_input *msg)
1616 {
1617     int ret = 0;
1618     uint8_t idx, prev_size;
1619 
1620     qemu_mutex_lock(&vmbus->rx_queue_lock);
1621 
1622     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1623         ret = -ENOBUFS;
1624         goto out;
1625     }
1626 
1627     prev_size = vmbus->rx_queue_size;
1628     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1629     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1630     vmbus->rx_queue_size++;
1631 
1632     /* only need to resched if the queue was empty before */
1633     if (!prev_size) {
1634         vmbus_resched(vmbus);
1635     }
1636 out:
1637     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1638     return ret;
1639 }
1640 
1641 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1642                                    void *data)
1643 {
1644     VMBus *vmbus = data;
1645     struct vmbus_message_header *vmbus_msg;
1646 
1647     if (msg->message_type != HV_MESSAGE_VMBUS) {
1648         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1649     }
1650 
1651     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1652         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1653     }
1654 
1655     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1656 
1657     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1658 
1659     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1660         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1661         error_report("vmbus: unknown message type %#x",
1662                      vmbus_msg->message_type);
1663         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1664     }
1665 
1666     if (enqueue_incoming_message(vmbus, msg)) {
1667         return HV_STATUS_INSUFFICIENT_BUFFERS;
1668     }
1669     return HV_STATUS_SUCCESS;
1670 }
1671 
1672 static bool vmbus_initialized(VMBus *vmbus)
1673 {
1674     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1675 }
1676 
1677 static void vmbus_reset_all(VMBus *vmbus)
1678 {
1679     qbus_reset_all(BUS(vmbus));
1680 }
1681 
1682 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1683 {
1684     int ret;
1685     struct hyperv_message msg = {
1686         .header.message_type = HV_MESSAGE_VMBUS,
1687     };
1688 
1689     assert(!vmbus->msg_in_progress);
1690     assert(msglen <= sizeof(msg.payload));
1691     assert(msglen >= sizeof(struct vmbus_message_header));
1692 
1693     vmbus->msg_in_progress = true;
1694 
1695     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1696                          msglen);
1697 
1698     memcpy(msg.payload, msgdata, msglen);
1699     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1700 
1701     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1702     if (ret == 0 || ret == -EAGAIN) {
1703         return;
1704     }
1705 
1706     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1707     vmbus_reset_all(vmbus);
1708 }
1709 
1710 static int vmbus_init(VMBus *vmbus)
1711 {
1712     if (vmbus->target_vp != (uint32_t)-1) {
1713         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1714                                                   vmbus_msg_cb, vmbus);
1715         if (!vmbus->sint_route) {
1716             error_report("failed to set up SINT route");
1717             return -ENOMEM;
1718         }
1719     }
1720     return 0;
1721 }
1722 
1723 static void vmbus_deinit(VMBus *vmbus)
1724 {
1725     VMBusGpadl *gpadl, *tmp_gpadl;
1726     VMBusChannel *chan;
1727 
1728     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1729         if (gpadl->state == VMGPADL_TORNDOWN) {
1730             continue;
1731         }
1732         vmbus_put_gpadl(gpadl);
1733     }
1734 
1735     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1736         chan->offer_state = VMOFFER_INIT;
1737     }
1738 
1739     hyperv_sint_route_unref(vmbus->sint_route);
1740     vmbus->sint_route = NULL;
1741     vmbus->int_page_gpa = 0;
1742     vmbus->target_vp = (uint32_t)-1;
1743     vmbus->version = 0;
1744     vmbus->state = VMBUS_LISTEN;
1745     vmbus->msg_in_progress = false;
1746 }
1747 
1748 static void handle_initiate_contact(VMBus *vmbus,
1749                                     vmbus_message_initiate_contact *msg,
1750                                     uint32_t msglen)
1751 {
1752     if (msglen < sizeof(*msg)) {
1753         return;
1754     }
1755 
1756     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1757                                  msg->version_requested & 0xffff,
1758                                  msg->target_vcpu, msg->monitor_page1,
1759                                  msg->monitor_page2, msg->interrupt_page);
1760 
1761     /*
1762      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1763      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1764      * before handing over to OS loader.
1765      */
1766     vmbus_reset_all(vmbus);
1767 
1768     vmbus->target_vp = msg->target_vcpu;
1769     vmbus->version = msg->version_requested;
1770     if (vmbus->version < VMBUS_VERSION_WIN8) {
1771         /* linux passes interrupt page even when it doesn't need it */
1772         vmbus->int_page_gpa = msg->interrupt_page;
1773     }
1774     vmbus->state = VMBUS_HANDSHAKE;
1775 
1776     if (vmbus_init(vmbus)) {
1777         error_report("failed to init vmbus; aborting");
1778         vmbus_deinit(vmbus);
1779         return;
1780     }
1781 }
1782 
1783 static void send_handshake(VMBus *vmbus)
1784 {
1785     struct vmbus_message_version_response msg = {
1786         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1787         .version_supported = vmbus_initialized(vmbus),
1788     };
1789 
1790     post_msg(vmbus, &msg, sizeof(msg));
1791 }
1792 
1793 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1794 {
1795     VMBusChannel *chan;
1796 
1797     if (!vmbus_initialized(vmbus)) {
1798         return;
1799     }
1800 
1801     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1802         if (chan->offer_state == VMOFFER_INIT) {
1803             chan->offer_state = VMOFFER_SENDING;
1804             break;
1805         }
1806     }
1807 
1808     vmbus->state = VMBUS_OFFER;
1809 }
1810 
1811 static void send_offer(VMBus *vmbus)
1812 {
1813     VMBusChannel *chan;
1814     struct vmbus_message_header alloffers_msg = {
1815         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1816     };
1817 
1818     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1819         if (chan->offer_state == VMOFFER_SENDING) {
1820             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1821             /* Hyper-V wants LE GUIDs */
1822             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1823             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1824             struct vmbus_message_offer_channel msg = {
1825                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1826                 .child_relid = chan->id,
1827                 .connection_id = chan_connection_id(chan),
1828                 .channel_flags = vdc->channel_flags,
1829                 .mmio_size_mb = vdc->mmio_size_mb,
1830                 .sub_channel_index = vmbus_channel_idx(chan),
1831                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1832             };
1833 
1834             memcpy(msg.type_uuid, &classid, sizeof(classid));
1835             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1836 
1837             trace_vmbus_send_offer(chan->id, chan->dev);
1838 
1839             post_msg(vmbus, &msg, sizeof(msg));
1840             return;
1841         }
1842     }
1843 
1844     /* no more offers, send terminator message */
1845     trace_vmbus_terminate_offers();
1846     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1847 }
1848 
1849 static bool complete_offer(VMBus *vmbus)
1850 {
1851     VMBusChannel *chan;
1852 
1853     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1854         if (chan->offer_state == VMOFFER_SENDING) {
1855             chan->offer_state = VMOFFER_SENT;
1856             goto next_offer;
1857         }
1858     }
1859     /*
1860      * no transitioning channels found so this is completing the terminator
1861      * message, and vmbus can move to the next state
1862      */
1863     return true;
1864 
1865 next_offer:
1866     /* try to mark another channel for offering */
1867     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1868         if (chan->offer_state == VMOFFER_INIT) {
1869             chan->offer_state = VMOFFER_SENDING;
1870             break;
1871         }
1872     }
1873     /*
1874      * if an offer has been sent there are more offers or the terminator yet to
1875      * send, so no state transition for vmbus
1876      */
1877     return false;
1878 }
1879 
1880 
1881 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1882                                 uint32_t msglen)
1883 {
1884     VMBusGpadl *gpadl;
1885     uint32_t num_gfns, i;
1886 
1887     /* must include at least one gpa range */
1888     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1889         !vmbus_initialized(vmbus)) {
1890         return;
1891     }
1892 
1893     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1894                sizeof(msg->range[0].pfn_array[0]);
1895 
1896     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1897 
1898     /*
1899      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1900      * ranges each with arbitrary size and alignment.  However in practice only
1901      * single-range page-aligned GPADLs have been observed so just ignore
1902      * anything else and simplify things greatly.
1903      */
1904     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1905         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1906         return;
1907     }
1908 
1909     /* ignore requests to create already existing GPADLs */
1910     if (find_gpadl(vmbus, msg->gpadl_id)) {
1911         return;
1912     }
1913 
1914     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1915 
1916     for (i = 0; i < num_gfns &&
1917          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1918          i++) {
1919         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1920     }
1921 
1922     if (gpadl_full(gpadl)) {
1923         vmbus->state = VMBUS_CREATE_GPADL;
1924     }
1925 }
1926 
1927 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1928                               uint32_t msglen)
1929 {
1930     VMBusGpadl *gpadl;
1931     uint32_t num_gfns_left, i;
1932 
1933     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1934         return;
1935     }
1936 
1937     trace_vmbus_gpadl_body(msg->gpadl_id);
1938 
1939     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1940     if (!gpadl) {
1941         return;
1942     }
1943 
1944     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1945     assert(num_gfns_left);
1946 
1947     for (i = 0; i < num_gfns_left &&
1948          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1949         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1950     }
1951 
1952     if (gpadl_full(gpadl)) {
1953         vmbus->state = VMBUS_CREATE_GPADL;
1954     }
1955 }
1956 
1957 static void send_create_gpadl(VMBus *vmbus)
1958 {
1959     VMBusGpadl *gpadl;
1960 
1961     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1962         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1963             struct vmbus_message_gpadl_created msg = {
1964                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1965                 .gpadl_id = gpadl->id,
1966                 .child_relid = gpadl->child_relid,
1967             };
1968 
1969             trace_vmbus_gpadl_created(gpadl->id);
1970             post_msg(vmbus, &msg, sizeof(msg));
1971             return;
1972         }
1973     }
1974 
1975     assert(false);
1976 }
1977 
1978 static bool complete_create_gpadl(VMBus *vmbus)
1979 {
1980     VMBusGpadl *gpadl;
1981 
1982     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1983         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1984             gpadl->state = VMGPADL_ALIVE;
1985 
1986             return true;
1987         }
1988     }
1989 
1990     assert(false);
1991     return false;
1992 }
1993 
1994 static void handle_gpadl_teardown(VMBus *vmbus,
1995                                   vmbus_message_gpadl_teardown *msg,
1996                                   uint32_t msglen)
1997 {
1998     VMBusGpadl *gpadl;
1999 
2000     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2001         return;
2002     }
2003 
2004     trace_vmbus_gpadl_teardown(msg->gpadl_id);
2005 
2006     gpadl = find_gpadl(vmbus, msg->gpadl_id);
2007     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2008         return;
2009     }
2010 
2011     gpadl->state = VMGPADL_TEARINGDOWN;
2012     vmbus->state = VMBUS_TEARDOWN_GPADL;
2013 }
2014 
2015 static void send_teardown_gpadl(VMBus *vmbus)
2016 {
2017     VMBusGpadl *gpadl;
2018 
2019     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2020         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2021             struct vmbus_message_gpadl_torndown msg = {
2022                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2023                 .gpadl_id = gpadl->id,
2024             };
2025 
2026             trace_vmbus_gpadl_torndown(gpadl->id);
2027             post_msg(vmbus, &msg, sizeof(msg));
2028             return;
2029         }
2030     }
2031 
2032     assert(false);
2033 }
2034 
2035 static bool complete_teardown_gpadl(VMBus *vmbus)
2036 {
2037     VMBusGpadl *gpadl;
2038 
2039     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2040         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2041             gpadl->state = VMGPADL_TORNDOWN;
2042             vmbus_put_gpadl(gpadl);
2043             return true;
2044         }
2045     }
2046 
2047     assert(false);
2048     return false;
2049 }
2050 
2051 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2052                                 uint32_t msglen)
2053 {
2054     VMBusChannel *chan;
2055 
2056     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2057         return;
2058     }
2059 
2060     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2061                              msg->target_vp);
2062     chan = find_channel(vmbus, msg->child_relid);
2063     if (!chan || chan->state != VMCHAN_INIT) {
2064         return;
2065     }
2066 
2067     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2068     chan->ringbuf_send_offset = msg->ring_buffer_offset;
2069     chan->target_vp = msg->target_vp;
2070     chan->open_id = msg->open_id;
2071 
2072     open_channel(chan);
2073 
2074     chan->state = VMCHAN_OPENING;
2075     vmbus->state = VMBUS_OPEN_CHANNEL;
2076 }
2077 
2078 static void send_open_channel(VMBus *vmbus)
2079 {
2080     VMBusChannel *chan;
2081 
2082     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2083         if (chan->state == VMCHAN_OPENING) {
2084             struct vmbus_message_open_result msg = {
2085                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2086                 .child_relid = chan->id,
2087                 .open_id = chan->open_id,
2088                 .status = !vmbus_channel_is_open(chan),
2089             };
2090 
2091             trace_vmbus_channel_open(chan->id, msg.status);
2092             post_msg(vmbus, &msg, sizeof(msg));
2093             return;
2094         }
2095     }
2096 
2097     assert(false);
2098 }
2099 
2100 static bool complete_open_channel(VMBus *vmbus)
2101 {
2102     VMBusChannel *chan;
2103 
2104     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2105         if (chan->state == VMCHAN_OPENING) {
2106             if (vmbus_channel_is_open(chan)) {
2107                 chan->state = VMCHAN_OPEN;
2108                 /*
2109                  * simulate guest notification of ringbuffer space made
2110                  * available, for the channel protocols where the host
2111                  * initiates the communication
2112                  */
2113                 vmbus_channel_notify_host(chan);
2114             } else {
2115                 chan->state = VMCHAN_INIT;
2116             }
2117             return true;
2118         }
2119     }
2120 
2121     assert(false);
2122     return false;
2123 }
2124 
2125 static void vdev_reset_on_close(VMBusDevice *vdev)
2126 {
2127     uint16_t i;
2128 
2129     for (i = 0; i < vdev->num_channels; i++) {
2130         if (vmbus_channel_is_open(&vdev->channels[i])) {
2131             return;
2132         }
2133     }
2134 
2135     /* all channels closed -- reset device */
2136     qdev_reset_all(DEVICE(vdev));
2137 }
2138 
2139 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2140                                  uint32_t msglen)
2141 {
2142     VMBusChannel *chan;
2143 
2144     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2145         return;
2146     }
2147 
2148     trace_vmbus_close_channel(msg->child_relid);
2149 
2150     chan = find_channel(vmbus, msg->child_relid);
2151     if (!chan) {
2152         return;
2153     }
2154 
2155     close_channel(chan);
2156     chan->state = VMCHAN_INIT;
2157 
2158     vdev_reset_on_close(chan->dev);
2159 }
2160 
2161 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2162 {
2163     vmbus->state = VMBUS_UNLOAD;
2164 }
2165 
2166 static void send_unload(VMBus *vmbus)
2167 {
2168     vmbus_message_header msg = {
2169         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2170     };
2171 
2172     qemu_mutex_lock(&vmbus->rx_queue_lock);
2173     vmbus->rx_queue_size = 0;
2174     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2175 
2176     post_msg(vmbus, &msg, sizeof(msg));
2177     return;
2178 }
2179 
2180 static bool complete_unload(VMBus *vmbus)
2181 {
2182     vmbus_reset_all(vmbus);
2183     return true;
2184 }
2185 
2186 static void process_message(VMBus *vmbus)
2187 {
2188     struct hyperv_post_message_input *hv_msg;
2189     struct vmbus_message_header *msg;
2190     void *msgdata;
2191     uint32_t msglen;
2192 
2193     qemu_mutex_lock(&vmbus->rx_queue_lock);
2194 
2195     if (!vmbus->rx_queue_size) {
2196         goto unlock;
2197     }
2198 
2199     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2200     msglen =  hv_msg->payload_size;
2201     if (msglen < sizeof(*msg)) {
2202         goto out;
2203     }
2204     msgdata = hv_msg->payload;
2205     msg = (struct vmbus_message_header *)msgdata;
2206 
2207     trace_vmbus_process_incoming_message(msg->message_type);
2208 
2209     switch (msg->message_type) {
2210     case VMBUS_MSG_INITIATE_CONTACT:
2211         handle_initiate_contact(vmbus, msgdata, msglen);
2212         break;
2213     case VMBUS_MSG_REQUESTOFFERS:
2214         handle_request_offers(vmbus, msgdata, msglen);
2215         break;
2216     case VMBUS_MSG_GPADL_HEADER:
2217         handle_gpadl_header(vmbus, msgdata, msglen);
2218         break;
2219     case VMBUS_MSG_GPADL_BODY:
2220         handle_gpadl_body(vmbus, msgdata, msglen);
2221         break;
2222     case VMBUS_MSG_GPADL_TEARDOWN:
2223         handle_gpadl_teardown(vmbus, msgdata, msglen);
2224         break;
2225     case VMBUS_MSG_OPENCHANNEL:
2226         handle_open_channel(vmbus, msgdata, msglen);
2227         break;
2228     case VMBUS_MSG_CLOSECHANNEL:
2229         handle_close_channel(vmbus, msgdata, msglen);
2230         break;
2231     case VMBUS_MSG_UNLOAD:
2232         handle_unload(vmbus, msgdata, msglen);
2233         break;
2234     default:
2235         error_report("unknown message type %#x", msg->message_type);
2236         break;
2237     }
2238 
2239 out:
2240     vmbus->rx_queue_size--;
2241     vmbus->rx_queue_head++;
2242     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2243 
2244     vmbus_resched(vmbus);
2245 unlock:
2246     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2247 }
2248 
2249 static const struct {
2250     void (*run)(VMBus *vmbus);
2251     bool (*complete)(VMBus *vmbus);
2252 } state_runner[] = {
2253     [VMBUS_LISTEN]         = {process_message,     NULL},
2254     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2255     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2256     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2257     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2258     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2259     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2260 };
2261 
2262 static void vmbus_do_run(VMBus *vmbus)
2263 {
2264     if (vmbus->msg_in_progress) {
2265         return;
2266     }
2267 
2268     assert(vmbus->state < VMBUS_STATE_MAX);
2269     assert(state_runner[vmbus->state].run);
2270     state_runner[vmbus->state].run(vmbus);
2271 }
2272 
2273 static void vmbus_run(void *opaque)
2274 {
2275     VMBus *vmbus = opaque;
2276 
2277     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2278     if (vmbus->in_progress) {
2279         return;
2280     }
2281 
2282     vmbus->in_progress = true;
2283     /*
2284      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2285      * should go *after* the code that can result in aio_poll; otherwise
2286      * reschedules can be missed.  No idea how to enforce that.
2287      */
2288     vmbus_do_run(vmbus);
2289     vmbus->in_progress = false;
2290 }
2291 
2292 static void vmbus_msg_cb(void *data, int status)
2293 {
2294     VMBus *vmbus = data;
2295     bool (*complete)(VMBus *vmbus);
2296 
2297     assert(vmbus->msg_in_progress);
2298 
2299     trace_vmbus_msg_cb(status);
2300 
2301     if (status == -EAGAIN) {
2302         goto out;
2303     }
2304     if (status) {
2305         error_report("message delivery fatal failure: %d; aborting vmbus",
2306                      status);
2307         vmbus_reset_all(vmbus);
2308         return;
2309     }
2310 
2311     assert(vmbus->state < VMBUS_STATE_MAX);
2312     complete = state_runner[vmbus->state].complete;
2313     if (!complete || complete(vmbus)) {
2314         vmbus->state = VMBUS_LISTEN;
2315     }
2316 out:
2317     vmbus->msg_in_progress = false;
2318     vmbus_resched(vmbus);
2319 }
2320 
2321 static void vmbus_resched(VMBus *vmbus)
2322 {
2323     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2324 }
2325 
2326 static void vmbus_signal_event(EventNotifier *e)
2327 {
2328     VMBusChannel *chan;
2329     VMBus *vmbus = container_of(e, VMBus, notifier);
2330     unsigned long *int_map;
2331     hwaddr addr, len;
2332     bool is_dirty = false;
2333 
2334     if (!event_notifier_test_and_clear(e)) {
2335         return;
2336     }
2337 
2338     trace_vmbus_signal_event();
2339 
2340     if (!vmbus->int_page_gpa) {
2341         return;
2342     }
2343 
2344     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2345     len = TARGET_PAGE_SIZE / 2;
2346     int_map = cpu_physical_memory_map(addr, &len, 1);
2347     if (len != TARGET_PAGE_SIZE / 2) {
2348         goto unmap;
2349     }
2350 
2351     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2352         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2353             if (!vmbus_channel_is_open(chan)) {
2354                 continue;
2355             }
2356             vmbus_channel_notify_host(chan);
2357             is_dirty = true;
2358         }
2359     }
2360 
2361 unmap:
2362     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2363 }
2364 
2365 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2366 {
2367     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2368     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2369     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2370     BusChild *child;
2371     Error *err = NULL;
2372     char idstr[UUID_FMT_LEN + 1];
2373 
2374     assert(!qemu_uuid_is_null(&vdev->instanceid));
2375 
2376     /* Check for instance id collision for this class id */
2377     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2378         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2379 
2380         if (child_dev == vdev) {
2381             continue;
2382         }
2383 
2384         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2385             qemu_uuid_unparse(&vdev->instanceid, idstr);
2386             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2387             goto error_out;
2388         }
2389     }
2390 
2391     vdev->dma_as = &address_space_memory;
2392 
2393     create_channels(vmbus, vdev, &err);
2394     if (err) {
2395         goto error_out;
2396     }
2397 
2398     if (vdc->vmdev_realize) {
2399         vdc->vmdev_realize(vdev, &err);
2400         if (err) {
2401             goto err_vdc_realize;
2402         }
2403     }
2404     return;
2405 
2406 err_vdc_realize:
2407     free_channels(vdev);
2408 error_out:
2409     error_propagate(errp, err);
2410 }
2411 
2412 static void vmbus_dev_reset(DeviceState *dev)
2413 {
2414     uint16_t i;
2415     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2416     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2417 
2418     if (vdev->channels) {
2419         for (i = 0; i < vdev->num_channels; i++) {
2420             VMBusChannel *chan = &vdev->channels[i];
2421             close_channel(chan);
2422             chan->state = VMCHAN_INIT;
2423         }
2424     }
2425 
2426     if (vdc->vmdev_reset) {
2427         vdc->vmdev_reset(vdev);
2428     }
2429 }
2430 
2431 static void vmbus_dev_unrealize(DeviceState *dev)
2432 {
2433     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2434     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2435 
2436     if (vdc->vmdev_unrealize) {
2437         vdc->vmdev_unrealize(vdev);
2438     }
2439     free_channels(vdev);
2440 }
2441 
2442 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2443 {
2444     DeviceClass *kdev = DEVICE_CLASS(klass);
2445     kdev->bus_type = TYPE_VMBUS;
2446     kdev->realize = vmbus_dev_realize;
2447     kdev->unrealize = vmbus_dev_unrealize;
2448     kdev->reset = vmbus_dev_reset;
2449 }
2450 
2451 static Property vmbus_dev_instanceid =
2452                         DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
2453 
2454 static void vmbus_dev_instance_init(Object *obj)
2455 {
2456     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2457     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2458 
2459     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2460         /* Class wants to only have a single instance with a fixed UUID */
2461         vdev->instanceid = vdc->instanceid;
2462     } else {
2463         qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
2464     }
2465 }
2466 
2467 const VMStateDescription vmstate_vmbus_dev = {
2468     .name = TYPE_VMBUS_DEVICE,
2469     .version_id = 0,
2470     .minimum_version_id = 0,
2471     .fields = (VMStateField[]) {
2472         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2473         VMSTATE_UINT16(num_channels, VMBusDevice),
2474         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2475                                              num_channels, vmstate_channel,
2476                                              VMBusChannel),
2477         VMSTATE_END_OF_LIST()
2478     }
2479 };
2480 
2481 /* vmbus generic device base */
2482 static const TypeInfo vmbus_dev_type_info = {
2483     .name = TYPE_VMBUS_DEVICE,
2484     .parent = TYPE_DEVICE,
2485     .abstract = true,
2486     .instance_size = sizeof(VMBusDevice),
2487     .class_size = sizeof(VMBusDeviceClass),
2488     .class_init = vmbus_dev_class_init,
2489     .instance_init = vmbus_dev_instance_init,
2490 };
2491 
2492 static void vmbus_realize(BusState *bus, Error **errp)
2493 {
2494     int ret = 0;
2495     Error *local_err = NULL;
2496     VMBus *vmbus = VMBUS(bus);
2497 
2498     qemu_mutex_init(&vmbus->rx_queue_lock);
2499 
2500     QTAILQ_INIT(&vmbus->gpadl_list);
2501     QTAILQ_INIT(&vmbus->channel_list);
2502 
2503     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2504                                  vmbus_recv_message, vmbus);
2505     if (ret != 0) {
2506         error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2507         goto error_out;
2508     }
2509 
2510     ret = event_notifier_init(&vmbus->notifier, 0);
2511     if (ret != 0) {
2512         error_setg(&local_err, "event notifier failed to init with %d", ret);
2513         goto remove_msg_handler;
2514     }
2515 
2516     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2517     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2518                                         &vmbus->notifier);
2519     if (ret != 0) {
2520         error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2521         goto clear_event_notifier;
2522     }
2523 
2524     return;
2525 
2526 clear_event_notifier:
2527     event_notifier_cleanup(&vmbus->notifier);
2528 remove_msg_handler:
2529     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2530 error_out:
2531     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2532     error_propagate(errp, local_err);
2533 }
2534 
2535 static void vmbus_unrealize(BusState *bus)
2536 {
2537     VMBus *vmbus = VMBUS(bus);
2538 
2539     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2540     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2541     event_notifier_cleanup(&vmbus->notifier);
2542 
2543     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2544 }
2545 
2546 static void vmbus_reset(BusState *bus)
2547 {
2548     vmbus_deinit(VMBUS(bus));
2549 }
2550 
2551 static char *vmbus_get_dev_path(DeviceState *dev)
2552 {
2553     BusState *bus = qdev_get_parent_bus(dev);
2554     return qdev_get_dev_path(bus->parent);
2555 }
2556 
2557 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2558 {
2559     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2560     char uuid[UUID_FMT_LEN + 1];
2561 
2562     qemu_uuid_unparse(&vdev->instanceid, uuid);
2563     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2564 }
2565 
2566 static void vmbus_class_init(ObjectClass *klass, void *data)
2567 {
2568     BusClass *k = BUS_CLASS(klass);
2569 
2570     k->get_dev_path = vmbus_get_dev_path;
2571     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2572     k->realize = vmbus_realize;
2573     k->unrealize = vmbus_unrealize;
2574     k->reset = vmbus_reset;
2575 }
2576 
2577 static int vmbus_pre_load(void *opaque)
2578 {
2579     VMBusChannel *chan;
2580     VMBus *vmbus = VMBUS(opaque);
2581 
2582     /*
2583      * channel IDs allocated by the source will come in the migration stream
2584      * for each channel, so clean up the ones allocated at realize
2585      */
2586     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2587         unregister_chan_id(chan);
2588     }
2589 
2590     return 0;
2591 }
2592 static int vmbus_post_load(void *opaque, int version_id)
2593 {
2594     int ret;
2595     VMBus *vmbus = VMBUS(opaque);
2596     VMBusGpadl *gpadl;
2597     VMBusChannel *chan;
2598 
2599     ret = vmbus_init(vmbus);
2600     if (ret) {
2601         return ret;
2602     }
2603 
2604     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2605         gpadl->vmbus = vmbus;
2606         gpadl->refcount = 1;
2607     }
2608 
2609     /*
2610      * reopening channels depends on initialized vmbus so it's done here
2611      * instead of channel_post_load()
2612      */
2613     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2614 
2615         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2616             open_channel(chan);
2617         }
2618 
2619         if (chan->state != VMCHAN_OPEN) {
2620             continue;
2621         }
2622 
2623         if (!vmbus_channel_is_open(chan)) {
2624             /* reopen failed, abort loading */
2625             return -1;
2626         }
2627 
2628         /* resume processing on the guest side if it missed the notification */
2629         hyperv_sint_route_set_sint(chan->notify_route);
2630         /* ditto on the host side */
2631         vmbus_channel_notify_host(chan);
2632     }
2633 
2634     vmbus_resched(vmbus);
2635     return 0;
2636 }
2637 
2638 static const VMStateDescription vmstate_post_message_input = {
2639     .name = "vmbus/hyperv_post_message_input",
2640     .version_id = 0,
2641     .minimum_version_id = 0,
2642     .fields = (VMStateField[]) {
2643         /*
2644          * skip connection_id and message_type as they are validated before
2645          * queueing and ignored on dequeueing
2646          */
2647         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2648         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2649                             HV_MESSAGE_PAYLOAD_SIZE),
2650         VMSTATE_END_OF_LIST()
2651     }
2652 };
2653 
2654 static bool vmbus_rx_queue_needed(void *opaque)
2655 {
2656     VMBus *vmbus = VMBUS(opaque);
2657     return vmbus->rx_queue_size;
2658 }
2659 
2660 static const VMStateDescription vmstate_rx_queue = {
2661     .name = "vmbus/rx_queue",
2662     .version_id = 0,
2663     .minimum_version_id = 0,
2664     .needed = vmbus_rx_queue_needed,
2665     .fields = (VMStateField[]) {
2666         VMSTATE_UINT8(rx_queue_head, VMBus),
2667         VMSTATE_UINT8(rx_queue_size, VMBus),
2668         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2669                              HV_MSG_QUEUE_LEN, 0,
2670                              vmstate_post_message_input,
2671                              struct hyperv_post_message_input),
2672         VMSTATE_END_OF_LIST()
2673     }
2674 };
2675 
2676 static const VMStateDescription vmstate_vmbus = {
2677     .name = TYPE_VMBUS,
2678     .version_id = 0,
2679     .minimum_version_id = 0,
2680     .pre_load = vmbus_pre_load,
2681     .post_load = vmbus_post_load,
2682     .fields = (VMStateField[]) {
2683         VMSTATE_UINT8(state, VMBus),
2684         VMSTATE_UINT32(version, VMBus),
2685         VMSTATE_UINT32(target_vp, VMBus),
2686         VMSTATE_UINT64(int_page_gpa, VMBus),
2687         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2688                          vmstate_gpadl, VMBusGpadl, link),
2689         VMSTATE_END_OF_LIST()
2690     },
2691     .subsections = (const VMStateDescription * []) {
2692         &vmstate_rx_queue,
2693         NULL
2694     }
2695 };
2696 
2697 static const TypeInfo vmbus_type_info = {
2698     .name = TYPE_VMBUS,
2699     .parent = TYPE_BUS,
2700     .instance_size = sizeof(VMBus),
2701     .class_init = vmbus_class_init,
2702 };
2703 
2704 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2705 {
2706     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2707 
2708     /*
2709      * here there's at least one vmbus bridge that is being realized, so
2710      * vmbus_bridge_find can only return NULL if it's not unique
2711      */
2712     if (!vmbus_bridge_find()) {
2713         error_setg(errp, "there can be at most one %s in the system",
2714                    TYPE_VMBUS_BRIDGE);
2715         return;
2716     }
2717 
2718     if (!hyperv_is_synic_enabled()) {
2719         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2720         return;
2721     }
2722 
2723     bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus"));
2724 }
2725 
2726 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2727 {
2728     /* there can be only one VMBus */
2729     return g_strdup("0");
2730 }
2731 
2732 static const VMStateDescription vmstate_vmbus_bridge = {
2733     .name = TYPE_VMBUS_BRIDGE,
2734     .version_id = 0,
2735     .minimum_version_id = 0,
2736     .fields = (VMStateField[]) {
2737         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2738         VMSTATE_END_OF_LIST()
2739     },
2740 };
2741 
2742 static Property vmbus_bridge_props[] = {
2743     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2744     DEFINE_PROP_END_OF_LIST()
2745 };
2746 
2747 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2748 {
2749     DeviceClass *k = DEVICE_CLASS(klass);
2750     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2751 
2752     k->realize = vmbus_bridge_realize;
2753     k->fw_name = "vmbus";
2754     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2755     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2756     k->vmsd = &vmstate_vmbus_bridge;
2757     device_class_set_props(k, vmbus_bridge_props);
2758     /* override SysBusDevice's default */
2759     k->user_creatable = true;
2760 }
2761 
2762 static const TypeInfo vmbus_bridge_type_info = {
2763     .name = TYPE_VMBUS_BRIDGE,
2764     .parent = TYPE_SYS_BUS_DEVICE,
2765     .instance_size = sizeof(VMBusBridge),
2766     .class_init = vmbus_bridge_class_init,
2767 };
2768 
2769 static void vmbus_register_types(void)
2770 {
2771     type_register_static(&vmbus_bridge_type_info);
2772     type_register_static(&vmbus_dev_type_info);
2773     type_register_static(&vmbus_type_info);
2774 }
2775 
2776 type_init(vmbus_register_types)
2777