xref: /openbmc/qemu/hw/hyperv/vmbus.c (revision 6016b7b46edb714a53a31536b30ead9c3aafaef7)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/qdev-properties-system.h"
17 #include "hw/hyperv/hyperv.h"
18 #include "hw/hyperv/vmbus.h"
19 #include "hw/hyperv/vmbus-bridge.h"
20 #include "hw/sysbus.h"
21 #include "cpu.h"
22 #include "trace.h"
23 
24 enum {
25     VMGPADL_INIT,
26     VMGPADL_ALIVE,
27     VMGPADL_TEARINGDOWN,
28     VMGPADL_TORNDOWN,
29 };
30 
31 struct VMBusGpadl {
32     /* GPADL id */
33     uint32_t id;
34     /* associated channel id (rudimentary?) */
35     uint32_t child_relid;
36 
37     /* number of pages in the GPADL as declared in GPADL_HEADER message */
38     uint32_t num_gfns;
39     /*
40      * Due to limited message size, GPADL may not fit fully in a single
41      * GPADL_HEADER message, and is further popluated using GPADL_BODY
42      * messages.  @seen_gfns is the number of pages seen so far; once it
43      * reaches @num_gfns, the GPADL is ready to use.
44      */
45     uint32_t seen_gfns;
46     /* array of GFNs (of size @num_gfns once allocated) */
47     uint64_t *gfns;
48 
49     uint8_t state;
50 
51     QTAILQ_ENTRY(VMBusGpadl) link;
52     VMBus *vmbus;
53     unsigned refcount;
54 };
55 
56 /*
57  * Wrap sequential read from / write to GPADL.
58  */
59 typedef struct GpadlIter {
60     VMBusGpadl *gpadl;
61     AddressSpace *as;
62     DMADirection dir;
63     /* offset into GPADL where the next i/o will be performed */
64     uint32_t off;
65     /*
66      * Cached mapping of the currently accessed page, up to page boundary.
67      * Updated lazily on i/o.
68      * Note: MemoryRegionCache can not be used here because pages in the GPADL
69      * are non-contiguous and may belong to different memory regions.
70      */
71     void *map;
72     /* offset after last i/o (i.e. not affected by seek) */
73     uint32_t last_off;
74     /*
75      * Indicator that the iterator is active and may have a cached mapping.
76      * Allows to enforce bracketing of all i/o (which may create cached
77      * mappings) and thus exclude mapping leaks.
78      */
79     bool active;
80 } GpadlIter;
81 
82 /*
83  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
84  * channel.
85  * Each ring buffer consists of a set of pages, with the first page containing
86  * the ring buffer header, and the remaining pages being for data packets.
87  */
88 typedef struct VMBusRingBufCommon {
89     AddressSpace *as;
90     /* GPA of the ring buffer header */
91     dma_addr_t rb_addr;
92     /* start and length of the ring buffer data area within GPADL */
93     uint32_t base;
94     uint32_t len;
95 
96     GpadlIter iter;
97 } VMBusRingBufCommon;
98 
99 typedef struct VMBusSendRingBuf {
100     VMBusRingBufCommon common;
101     /* current write index, to be committed at the end of send */
102     uint32_t wr_idx;
103     /* write index at the start of send */
104     uint32_t last_wr_idx;
105     /* space to be requested from the guest */
106     uint32_t wanted;
107     /* space reserved for planned sends */
108     uint32_t reserved;
109     /* last seen read index */
110     uint32_t last_seen_rd_idx;
111 } VMBusSendRingBuf;
112 
113 typedef struct VMBusRecvRingBuf {
114     VMBusRingBufCommon common;
115     /* current read index, to be committed at the end of receive */
116     uint32_t rd_idx;
117     /* read index at the start of receive */
118     uint32_t last_rd_idx;
119     /* last seen write index */
120     uint32_t last_seen_wr_idx;
121 } VMBusRecvRingBuf;
122 
123 
124 enum {
125     VMOFFER_INIT,
126     VMOFFER_SENDING,
127     VMOFFER_SENT,
128 };
129 
130 enum {
131     VMCHAN_INIT,
132     VMCHAN_OPENING,
133     VMCHAN_OPEN,
134 };
135 
136 struct VMBusChannel {
137     VMBusDevice *dev;
138 
139     /* channel id */
140     uint32_t id;
141     /*
142      * subchannel index within the device; subchannel #0 is "primary" and
143      * always exists
144      */
145     uint16_t subchan_idx;
146     uint32_t open_id;
147     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
148     uint32_t target_vp;
149     /* GPADL id to use for the ring buffers */
150     uint32_t ringbuf_gpadl;
151     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
152     uint32_t ringbuf_send_offset;
153 
154     uint8_t offer_state;
155     uint8_t state;
156     bool is_open;
157 
158     /* main device worker; copied from the device class */
159     VMBusChannelNotifyCb notify_cb;
160     /*
161      * guest->host notifications, either sent directly or dispatched via
162      * interrupt page (older VMBus)
163      */
164     EventNotifier notifier;
165 
166     VMBus *vmbus;
167     /*
168      * SINT route to signal with host->guest notifications; may be shared with
169      * the main VMBus SINT route
170      */
171     HvSintRoute *notify_route;
172     VMBusGpadl *gpadl;
173 
174     VMBusSendRingBuf send_ringbuf;
175     VMBusRecvRingBuf recv_ringbuf;
176 
177     QTAILQ_ENTRY(VMBusChannel) link;
178 };
179 
180 /*
181  * Hyper-V spec mandates that every message port has 16 buffers, which means
182  * that the guest can post up to this many messages without blocking.
183  * Therefore a queue for incoming messages has to be provided.
184  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
185  * doesn't transition to a new state until the message is known to have been
186  * successfully delivered to the respective SynIC message slot.
187  */
188 #define HV_MSG_QUEUE_LEN     16
189 
190 /* Hyper-V devices never use channel #0.  Must be something special. */
191 #define VMBUS_FIRST_CHANID      1
192 /* Each channel occupies one bit within a single event page sint slot. */
193 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
194 /* Leave a few connection numbers for other purposes. */
195 #define VMBUS_CHAN_CONNECTION_OFFSET     16
196 
197 /*
198  * Since the success or failure of sending a message is reported
199  * asynchronously, the VMBus state machine has effectively two entry points:
200  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
201  * message delivery status becomes known).  Both are run as oneshot BHs on the
202  * main aio context, ensuring serialization.
203  */
204 enum {
205     VMBUS_LISTEN,
206     VMBUS_HANDSHAKE,
207     VMBUS_OFFER,
208     VMBUS_CREATE_GPADL,
209     VMBUS_TEARDOWN_GPADL,
210     VMBUS_OPEN_CHANNEL,
211     VMBUS_UNLOAD,
212     VMBUS_STATE_MAX
213 };
214 
215 struct VMBus {
216     BusState parent;
217 
218     uint8_t state;
219     /* protection against recursive aio_poll (see vmbus_run) */
220     bool in_progress;
221     /* whether there's a message being delivered to the guest */
222     bool msg_in_progress;
223     uint32_t version;
224     /* VP_INDEX of the vCPU to send messages and interrupts to */
225     uint32_t target_vp;
226     HvSintRoute *sint_route;
227     /*
228      * interrupt page for older protocol versions; newer ones use SynIC event
229      * flags directly
230      */
231     hwaddr int_page_gpa;
232 
233     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
234 
235     /* incoming message queue */
236     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
237     uint8_t rx_queue_head;
238     uint8_t rx_queue_size;
239     QemuMutex rx_queue_lock;
240 
241     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
242     QTAILQ_HEAD(, VMBusChannel) channel_list;
243 
244     /*
245      * guest->host notifications for older VMBus, to be dispatched via
246      * interrupt page
247      */
248     EventNotifier notifier;
249 };
250 
251 static bool gpadl_full(VMBusGpadl *gpadl)
252 {
253     return gpadl->seen_gfns == gpadl->num_gfns;
254 }
255 
256 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
257                                 uint32_t child_relid, uint32_t num_gfns)
258 {
259     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
260 
261     gpadl->id = id;
262     gpadl->child_relid = child_relid;
263     gpadl->num_gfns = num_gfns;
264     gpadl->gfns = g_new(uint64_t, num_gfns);
265     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
266     gpadl->vmbus = vmbus;
267     gpadl->refcount = 1;
268     return gpadl;
269 }
270 
271 static void free_gpadl(VMBusGpadl *gpadl)
272 {
273     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
274     g_free(gpadl->gfns);
275     g_free(gpadl);
276 }
277 
278 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
279 {
280     VMBusGpadl *gpadl;
281     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
282         if (gpadl->id == gpadl_id) {
283             return gpadl;
284         }
285     }
286     return NULL;
287 }
288 
289 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
290 {
291     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
292     if (!gpadl || !gpadl_full(gpadl)) {
293         return NULL;
294     }
295     gpadl->refcount++;
296     return gpadl;
297 }
298 
299 void vmbus_put_gpadl(VMBusGpadl *gpadl)
300 {
301     if (!gpadl) {
302         return;
303     }
304     if (--gpadl->refcount) {
305         return;
306     }
307     free_gpadl(gpadl);
308 }
309 
310 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
311 {
312     return gpadl->num_gfns * TARGET_PAGE_SIZE;
313 }
314 
315 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
316                             AddressSpace *as, DMADirection dir)
317 {
318     iter->gpadl = gpadl;
319     iter->as = as;
320     iter->dir = dir;
321     iter->active = false;
322 }
323 
324 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
325 {
326     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
327     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
328 
329     /* mapping is only done to do non-zero amount of i/o */
330     assert(iter->last_off > 0);
331     assert(map_start_in_page < io_end_in_page);
332 
333     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
334                      iter->dir, io_end_in_page - map_start_in_page);
335 }
336 
337 /*
338  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
339  * The direction of the copy is determined by @iter->dir.
340  * The caller must ensure the operation overflows neither @buf nor the GPADL
341  * (there's an assert for the latter).
342  * Reuse the currently mapped page in the GPADL if possible.
343  */
344 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
345 {
346     ssize_t ret = len;
347 
348     assert(iter->active);
349 
350     while (len) {
351         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
352         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
353         uint32_t cplen = MIN(pgleft, len);
354         void *p;
355 
356         /* try to reuse the cached mapping */
357         if (iter->map) {
358             uint32_t map_start_in_page =
359                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
360             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
361             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
362             if (off_base != mapped_base || off_in_page < map_start_in_page) {
363                 gpadl_iter_cache_unmap(iter);
364                 iter->map = NULL;
365             }
366         }
367 
368         if (!iter->map) {
369             dma_addr_t maddr;
370             dma_addr_t mlen = pgleft;
371             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
372             assert(idx < iter->gpadl->num_gfns);
373 
374             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
375 
376             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
377             if (mlen != pgleft) {
378                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
379                 iter->map = NULL;
380                 return -EFAULT;
381             }
382         }
383 
384         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
385                 off_in_page);
386         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
387             memcpy(p, buf, cplen);
388         } else {
389             memcpy(buf, p, cplen);
390         }
391 
392         buf += cplen;
393         len -= cplen;
394         iter->off += cplen;
395         iter->last_off = iter->off;
396     }
397 
398     return ret;
399 }
400 
401 /*
402  * Position the iterator @iter at new offset @new_off.
403  * If this results in the cached mapping being unusable with the new offset,
404  * unmap it.
405  */
406 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
407 {
408     assert(iter->active);
409     iter->off = new_off;
410 }
411 
412 /*
413  * Start a series of i/o on the GPADL.
414  * After this i/o and seek operations on @iter become legal.
415  */
416 static inline void gpadl_iter_start_io(GpadlIter *iter)
417 {
418     assert(!iter->active);
419     /* mapping is cached lazily on i/o */
420     iter->map = NULL;
421     iter->active = true;
422 }
423 
424 /*
425  * End the eariler started series of i/o on the GPADL and release the cached
426  * mapping if any.
427  */
428 static inline void gpadl_iter_end_io(GpadlIter *iter)
429 {
430     assert(iter->active);
431 
432     if (iter->map) {
433         gpadl_iter_cache_unmap(iter);
434     }
435 
436     iter->active = false;
437 }
438 
439 static void vmbus_resched(VMBus *vmbus);
440 static void vmbus_msg_cb(void *data, int status);
441 
442 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
443                            const struct iovec *iov, size_t iov_cnt)
444 {
445     GpadlIter iter;
446     size_t i;
447     ssize_t ret = 0;
448 
449     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
450                     DMA_DIRECTION_FROM_DEVICE);
451     gpadl_iter_start_io(&iter);
452     gpadl_iter_seek(&iter, off);
453     for (i = 0; i < iov_cnt; i++) {
454         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
455         if (ret < 0) {
456             goto out;
457         }
458     }
459 out:
460     gpadl_iter_end_io(&iter);
461     return ret;
462 }
463 
464 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
465                   unsigned iov_cnt, size_t len, size_t off)
466 {
467     int ret_cnt = 0, ret;
468     unsigned i;
469     QEMUSGList *sgl = &req->sgl;
470     ScatterGatherEntry *sg = sgl->sg;
471 
472     for (i = 0; i < sgl->nsg; i++) {
473         if (sg[i].len > off) {
474             break;
475         }
476         off -= sg[i].len;
477     }
478     for (; len && i < sgl->nsg; i++) {
479         dma_addr_t mlen = MIN(sg[i].len - off, len);
480         dma_addr_t addr = sg[i].base + off;
481         len -= mlen;
482         off = 0;
483 
484         for (; mlen; ret_cnt++) {
485             dma_addr_t l = mlen;
486             dma_addr_t a = addr;
487 
488             if (ret_cnt == iov_cnt) {
489                 ret = -ENOBUFS;
490                 goto err;
491             }
492 
493             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
494             if (!l) {
495                 ret = -EFAULT;
496                 goto err;
497             }
498             iov[ret_cnt].iov_len = l;
499             addr += l;
500             mlen -= l;
501         }
502     }
503 
504     return ret_cnt;
505 err:
506     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
507     return ret;
508 }
509 
510 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
511                      unsigned iov_cnt, size_t accessed)
512 {
513     QEMUSGList *sgl = &req->sgl;
514     unsigned i;
515 
516     for (i = 0; i < iov_cnt; i++) {
517         size_t acsd = MIN(accessed, iov[i].iov_len);
518         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
519         accessed -= acsd;
520     }
521 }
522 
523 static const VMStateDescription vmstate_gpadl = {
524     .name = "vmbus/gpadl",
525     .version_id = 0,
526     .minimum_version_id = 0,
527     .fields = (VMStateField[]) {
528         VMSTATE_UINT32(id, VMBusGpadl),
529         VMSTATE_UINT32(child_relid, VMBusGpadl),
530         VMSTATE_UINT32(num_gfns, VMBusGpadl),
531         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
532         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
533                                     vmstate_info_uint64, uint64_t),
534         VMSTATE_UINT8(state, VMBusGpadl),
535         VMSTATE_END_OF_LIST()
536     }
537 };
538 
539 /*
540  * Wrap the index into a ring buffer of @len bytes.
541  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
542  * single wraparound is considered.
543  */
544 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
545 {
546     if (idx >= len) {
547         idx -= len;
548     }
549     return idx;
550 }
551 
552 /*
553  * Circular difference between two indices into a ring buffer of @len bytes.
554  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
555  * up write index but not vice versa.
556  */
557 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
558                                     bool allow_catchup)
559 {
560     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
561 }
562 
563 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
564 {
565     vmbus_ring_buffer *rb;
566     dma_addr_t mlen = sizeof(*rb);
567 
568     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
569                         DMA_DIRECTION_FROM_DEVICE);
570     if (mlen != sizeof(*rb)) {
571         dma_memory_unmap(ringbuf->as, rb, mlen,
572                          DMA_DIRECTION_FROM_DEVICE, 0);
573         return NULL;
574     }
575     return rb;
576 }
577 
578 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
579                               vmbus_ring_buffer *rb, bool dirty)
580 {
581     assert(rb);
582 
583     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
584                      dirty ? sizeof(*rb) : 0);
585 }
586 
587 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
588                                 AddressSpace *as, DMADirection dir,
589                                 uint32_t begin, uint32_t end)
590 {
591     ringbuf->as = as;
592     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
593     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
594     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
595     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
596 }
597 
598 static int ringbufs_init(VMBusChannel *chan)
599 {
600     vmbus_ring_buffer *rb;
601     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
602     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
603 
604     if (chan->ringbuf_send_offset <= 1 ||
605         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
606         return -EINVAL;
607     }
608 
609     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
610                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
611     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
612                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
613                         chan->gpadl->num_gfns);
614     send_ringbuf->wanted = 0;
615     send_ringbuf->reserved = 0;
616 
617     rb = ringbuf_map_hdr(&recv_ringbuf->common);
618     if (!rb) {
619         return -EFAULT;
620     }
621     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
622     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
623 
624     rb = ringbuf_map_hdr(&send_ringbuf->common);
625     if (!rb) {
626         return -EFAULT;
627     }
628     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
629     send_ringbuf->last_seen_rd_idx = rb->read_index;
630     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
631     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
632 
633     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
634         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
635         return -EOVERFLOW;
636     }
637 
638     return 0;
639 }
640 
641 /*
642  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
643  * around if needed.
644  * @len is assumed not to exceed the size of the ringbuffer, so only single
645  * wraparound is considered.
646  */
647 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
648 {
649     ssize_t ret1 = 0, ret2 = 0;
650     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
651 
652     if (len >= remain) {
653         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
654         if (ret1 < 0) {
655             return ret1;
656         }
657         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
658         buf += remain;
659         len -= remain;
660     }
661     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
662     if (ret2 < 0) {
663         return ret2;
664     }
665     return ret1 + ret2;
666 }
667 
668 /*
669  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
670  * around if needed.
671  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
672  * single wraparound is considered.
673  */
674 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
675 {
676     gpadl_iter_seek(&ringbuf->iter,
677                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
678 }
679 
680 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
681 {
682     return ringbuf->iter.off - ringbuf->base;
683 }
684 
685 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
686 {
687     gpadl_iter_start_io(&ringbuf->iter);
688 }
689 
690 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
691 {
692     gpadl_iter_end_io(&ringbuf->iter);
693 }
694 
695 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
696 {
697     return chan->dev;
698 }
699 
700 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
701 {
702     if (chan_idx >= dev->num_channels) {
703         return NULL;
704     }
705     return &dev->channels[chan_idx];
706 }
707 
708 uint32_t vmbus_channel_idx(VMBusChannel *chan)
709 {
710     return chan - chan->dev->channels;
711 }
712 
713 void vmbus_channel_notify_host(VMBusChannel *chan)
714 {
715     event_notifier_set(&chan->notifier);
716 }
717 
718 bool vmbus_channel_is_open(VMBusChannel *chan)
719 {
720     return chan->is_open;
721 }
722 
723 /*
724  * Notify the guest side about the data to work on in the channel ring buffer.
725  * The notification is done by signaling a dedicated per-channel SynIC event
726  * flag (more recent guests) or setting a bit in the interrupt page and firing
727  * the VMBus SINT (older guests).
728  */
729 static int vmbus_channel_notify_guest(VMBusChannel *chan)
730 {
731     int res = 0;
732     unsigned long *int_map, mask;
733     unsigned idx;
734     hwaddr addr = chan->vmbus->int_page_gpa;
735     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
736 
737     trace_vmbus_channel_notify_guest(chan->id);
738 
739     if (!addr) {
740         return hyperv_set_event_flag(chan->notify_route, chan->id);
741     }
742 
743     int_map = cpu_physical_memory_map(addr, &len, 1);
744     if (len != TARGET_PAGE_SIZE / 2) {
745         res = -ENXIO;
746         goto unmap;
747     }
748 
749     idx = BIT_WORD(chan->id);
750     mask = BIT_MASK(chan->id);
751     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
752         res = hyperv_sint_route_set_sint(chan->notify_route);
753         dirty = len;
754     }
755 
756 unmap:
757     cpu_physical_memory_unmap(int_map, len, 1, dirty);
758     return res;
759 }
760 
761 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
762 
763 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
764                                           uint32_t desclen, uint32_t msglen)
765 {
766     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
767         DIV_ROUND_UP(desclen, sizeof(uint64_t));
768     hdr->len_qwords = hdr->offset_qwords +
769         DIV_ROUND_UP(msglen, sizeof(uint64_t));
770     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
771 }
772 
773 /*
774  * Simplified ring buffer operation with paired barriers annotations in the
775  * producer and consumer loops:
776  *
777  * producer                           * consumer
778  * ~~~~~~~~                           * ~~~~~~~~
779  * write pending_send_sz              * read write_index
780  * smp_mb                       [A]   * smp_mb                       [C]
781  * read read_index                    * read packet
782  * smp_mb                       [B]   * read/write out-of-band data
783  * read/write out-of-band data        * smp_mb                       [B]
784  * write packet                       * write read_index
785  * smp_mb                       [C]   * smp_mb                       [A]
786  * write write_index                  * read pending_send_sz
787  * smp_wmb                      [D]   * smp_rmb                      [D]
788  * write pending_send_sz              * read write_index
789  * ...                                * ...
790  */
791 
792 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
793 {
794     /* don't trust guest data */
795     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
796         return 0;
797     }
798     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
799                         ringbuf->common.len, false);
800 }
801 
802 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
803 {
804     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
805     vmbus_ring_buffer *rb;
806     uint32_t written;
807 
808     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
809                            ringbuf->common.len, true);
810     if (!written) {
811         return 0;
812     }
813 
814     rb = ringbuf_map_hdr(&ringbuf->common);
815     if (!rb) {
816         return -EFAULT;
817     }
818 
819     ringbuf->reserved -= written;
820 
821     /* prevent reorder with the data operation and packet write */
822     smp_mb();                   /* barrier pair [C] */
823     rb->write_index = ringbuf->wr_idx;
824 
825     /*
826      * If the producer earlier indicated that it wants to be notified when the
827      * consumer frees certain amount of space in the ring buffer, that amount
828      * is reduced by the size of the completed write.
829      */
830     if (ringbuf->wanted) {
831         /* otherwise reservation would fail */
832         assert(ringbuf->wanted < written);
833         ringbuf->wanted -= written;
834         /* prevent reorder with write_index write */
835         smp_wmb();              /* barrier pair [D] */
836         rb->pending_send_sz = ringbuf->wanted;
837     }
838 
839     /* prevent reorder with write_index or pending_send_sz write */
840     smp_mb();                   /* barrier pair [A] */
841     ringbuf->last_seen_rd_idx = rb->read_index;
842 
843     /*
844      * The consumer may have missed the reduction of pending_send_sz and skip
845      * notification, so re-check the blocking condition, and, if it's no longer
846      * true, ensure processing another iteration by simulating consumer's
847      * notification.
848      */
849     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
850         vmbus_channel_notify_host(chan);
851     }
852 
853     /* skip notification by consumer's request */
854     if (rb->interrupt_mask) {
855         goto out;
856     }
857 
858     /*
859      * The consumer hasn't caught up with the producer's previous state so it's
860      * not blocked.
861      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
862      * validation here as it only affects notification.)
863      */
864     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
865                      ringbuf->common.len, true) > written) {
866         goto out;
867     }
868 
869     vmbus_channel_notify_guest(chan);
870 out:
871     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
872     ringbuf->last_wr_idx = ringbuf->wr_idx;
873     return written;
874 }
875 
876 int vmbus_channel_reserve(VMBusChannel *chan,
877                           uint32_t desclen, uint32_t msglen)
878 {
879     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
880     vmbus_ring_buffer *rb = NULL;
881     vmbus_packet_hdr hdr;
882     uint32_t needed = ringbuf->reserved +
883         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
884 
885     /* avoid touching the guest memory if possible */
886     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
887         goto success;
888     }
889 
890     rb = ringbuf_map_hdr(&ringbuf->common);
891     if (!rb) {
892         return -EFAULT;
893     }
894 
895     /* fetch read index from guest memory and try again */
896     ringbuf->last_seen_rd_idx = rb->read_index;
897 
898     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
899         goto success;
900     }
901 
902     rb->pending_send_sz = needed;
903 
904     /*
905      * The consumer may have made progress and freed up some space before
906      * seeing updated pending_send_sz, so re-read read_index (preventing
907      * reorder with the pending_send_sz write) and try again.
908      */
909     smp_mb();                   /* barrier pair [A] */
910     ringbuf->last_seen_rd_idx = rb->read_index;
911 
912     if (needed > ringbuf_send_avail(ringbuf)) {
913         goto out;
914     }
915 
916 success:
917     ringbuf->reserved = needed;
918     needed = 0;
919 
920     /* clear pending_send_sz if it was set */
921     if (ringbuf->wanted) {
922         if (!rb) {
923             rb = ringbuf_map_hdr(&ringbuf->common);
924             if (!rb) {
925                 /* failure to clear pending_send_sz is non-fatal */
926                 goto out;
927             }
928         }
929 
930         rb->pending_send_sz = 0;
931     }
932 
933     /* prevent reorder of the following data operation with read_index read */
934     smp_mb();                   /* barrier pair [B] */
935 
936 out:
937     if (rb) {
938         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
939     }
940     ringbuf->wanted = needed;
941     return needed ? -ENOSPC : 0;
942 }
943 
944 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
945                            void *desc, uint32_t desclen,
946                            void *msg, uint32_t msglen,
947                            bool need_comp, uint64_t transaction_id)
948 {
949     ssize_t ret = 0;
950     vmbus_packet_hdr hdr;
951     uint32_t totlen;
952     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
953 
954     if (!vmbus_channel_is_open(chan)) {
955         return -EINVAL;
956     }
957 
958     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
959     hdr.type = pkt_type;
960     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
961     hdr.transaction_id = transaction_id;
962 
963     assert(totlen <= ringbuf->reserved);
964 
965     ringbuf_start_io(&ringbuf->common);
966     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
967     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
968     if (ret < 0) {
969         goto out;
970     }
971     if (desclen) {
972         assert(desc);
973         ret = ringbuf_io(&ringbuf->common, desc, desclen);
974         if (ret < 0) {
975             goto out;
976         }
977         ringbuf_seek(&ringbuf->common,
978                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
979     }
980     ret = ringbuf_io(&ringbuf->common, msg, msglen);
981     if (ret < 0) {
982         goto out;
983     }
984     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
985     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
986     ret = 0;
987 out:
988     ringbuf_end_io(&ringbuf->common);
989     if (ret) {
990         return ret;
991     }
992     return ringbuf_send_update_idx(chan);
993 }
994 
995 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
996                                       void *msg, uint32_t msglen)
997 {
998     assert(req->need_comp);
999     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1000                               msg, msglen, false, req->transaction_id);
1001 }
1002 
1003 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1004                                VMBusRingBufCommon *ringbuf, uint32_t len)
1005 {
1006     int ret;
1007     vmbus_pkt_gpa_direct hdr;
1008     hwaddr curaddr = 0;
1009     hwaddr curlen = 0;
1010     int num;
1011 
1012     if (len < sizeof(hdr)) {
1013         return -EIO;
1014     }
1015     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1016     if (ret < 0) {
1017         return ret;
1018     }
1019     len -= sizeof(hdr);
1020 
1021     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1022     if (num < 0) {
1023         return -EIO;
1024     }
1025     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1026 
1027     for (; hdr.rangecount; hdr.rangecount--) {
1028         vmbus_gpa_range range;
1029 
1030         if (len < sizeof(range)) {
1031             goto eio;
1032         }
1033         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1034         if (ret < 0) {
1035             goto err;
1036         }
1037         len -= sizeof(range);
1038 
1039         if (range.byte_offset & TARGET_PAGE_MASK) {
1040             goto eio;
1041         }
1042 
1043         for (; range.byte_count; range.byte_offset = 0) {
1044             uint64_t paddr;
1045             uint32_t plen = MIN(range.byte_count,
1046                                 TARGET_PAGE_SIZE - range.byte_offset);
1047 
1048             if (len < sizeof(uint64_t)) {
1049                 goto eio;
1050             }
1051             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1052             if (ret < 0) {
1053                 goto err;
1054             }
1055             len -= sizeof(uint64_t);
1056             paddr <<= TARGET_PAGE_BITS;
1057             paddr |= range.byte_offset;
1058             range.byte_count -= plen;
1059 
1060             if (curaddr + curlen == paddr) {
1061                 /* consecutive fragments - join */
1062                 curlen += plen;
1063             } else {
1064                 if (curlen) {
1065                     qemu_sglist_add(sgl, curaddr, curlen);
1066                 }
1067 
1068                 curaddr = paddr;
1069                 curlen = plen;
1070             }
1071         }
1072     }
1073 
1074     if (curlen) {
1075         qemu_sglist_add(sgl, curaddr, curlen);
1076     }
1077 
1078     return 0;
1079 eio:
1080     ret = -EIO;
1081 err:
1082     qemu_sglist_destroy(sgl);
1083     return ret;
1084 }
1085 
1086 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1087                                      uint32_t size, uint16_t pkt_type,
1088                                      uint32_t msglen, uint64_t transaction_id,
1089                                      bool need_comp)
1090 {
1091     VMBusChanReq *req;
1092     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1093     uint32_t totlen = msgoff + msglen;
1094 
1095     req = g_malloc0(totlen);
1096     req->chan = chan;
1097     req->pkt_type = pkt_type;
1098     req->msg = (void *)req + msgoff;
1099     req->msglen = msglen;
1100     req->transaction_id = transaction_id;
1101     req->need_comp = need_comp;
1102     return req;
1103 }
1104 
1105 int vmbus_channel_recv_start(VMBusChannel *chan)
1106 {
1107     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1108     vmbus_ring_buffer *rb;
1109 
1110     rb = ringbuf_map_hdr(&ringbuf->common);
1111     if (!rb) {
1112         return -EFAULT;
1113     }
1114     ringbuf->last_seen_wr_idx = rb->write_index;
1115     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1116 
1117     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1118         return -EOVERFLOW;
1119     }
1120 
1121     /* prevent reorder of the following data operation with write_index read */
1122     smp_mb();                   /* barrier pair [C] */
1123     return 0;
1124 }
1125 
1126 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1127 {
1128     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1129     vmbus_packet_hdr hdr = {};
1130     VMBusChanReq *req;
1131     uint32_t avail;
1132     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1133 
1134     assert(size >= sizeof(*req));
1135 
1136     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1137     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1138                          ringbuf->common.len, true);
1139     if (avail < sizeof(hdr)) {
1140         return NULL;
1141     }
1142 
1143     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1144     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1145         return NULL;
1146     }
1147 
1148     pktlen = hdr.len_qwords * sizeof(uint64_t);
1149     totlen = pktlen + VMBUS_PKT_TRAILER;
1150     if (totlen > avail) {
1151         return NULL;
1152     }
1153 
1154     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1155     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1156         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1157         return NULL;
1158     }
1159 
1160     msglen = pktlen - msgoff;
1161 
1162     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1163                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1164 
1165     switch (hdr.type) {
1166     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1167         desclen = msgoff - sizeof(hdr);
1168         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1169                                 desclen) < 0) {
1170             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1171             goto free_req;
1172         }
1173         break;
1174     case VMBUS_PACKET_DATA_INBAND:
1175     case VMBUS_PACKET_COMP:
1176         break;
1177     default:
1178         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1179         goto free_req;
1180     }
1181 
1182     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1183     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1184         goto free_req;
1185     }
1186     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1187 
1188     return req;
1189 free_req:
1190     vmbus_free_req(req);
1191     return NULL;
1192 }
1193 
1194 void vmbus_channel_recv_pop(VMBusChannel *chan)
1195 {
1196     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1197     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1198 }
1199 
1200 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1201 {
1202     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1203     vmbus_ring_buffer *rb;
1204     uint32_t read;
1205 
1206     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1207                         ringbuf->common.len, true);
1208     if (!read) {
1209         return 0;
1210     }
1211 
1212     rb = ringbuf_map_hdr(&ringbuf->common);
1213     if (!rb) {
1214         return -EFAULT;
1215     }
1216 
1217     /* prevent reorder with the data operation and packet read */
1218     smp_mb();                   /* barrier pair [B] */
1219     rb->read_index = ringbuf->rd_idx;
1220 
1221     /* prevent reorder of the following pending_send_sz read */
1222     smp_mb();                   /* barrier pair [A] */
1223 
1224     if (rb->interrupt_mask) {
1225         goto out;
1226     }
1227 
1228     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1229         uint32_t wr_idx, wr_avail;
1230         uint32_t wanted = rb->pending_send_sz;
1231 
1232         if (!wanted) {
1233             goto out;
1234         }
1235 
1236         /* prevent reorder with pending_send_sz read */
1237         smp_rmb();              /* barrier pair [D] */
1238         wr_idx = rb->write_index;
1239 
1240         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1241                                 true);
1242 
1243         /* the producer wasn't blocked on the consumer state */
1244         if (wr_avail >= read + wanted) {
1245             goto out;
1246         }
1247         /* there's not enough space for the producer to make progress */
1248         if (wr_avail < wanted) {
1249             goto out;
1250         }
1251     }
1252 
1253     vmbus_channel_notify_guest(chan);
1254 out:
1255     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1256     ringbuf->last_rd_idx = ringbuf->rd_idx;
1257     return read;
1258 }
1259 
1260 void vmbus_free_req(void *req)
1261 {
1262     VMBusChanReq *r = req;
1263 
1264     if (!req) {
1265         return;
1266     }
1267 
1268     if (r->sgl.dev) {
1269         qemu_sglist_destroy(&r->sgl);
1270     }
1271     g_free(req);
1272 }
1273 
1274 static const VMStateDescription vmstate_sgent = {
1275     .name = "vmbus/sgentry",
1276     .version_id = 0,
1277     .minimum_version_id = 0,
1278     .fields = (VMStateField[]) {
1279         VMSTATE_UINT64(base, ScatterGatherEntry),
1280         VMSTATE_UINT64(len, ScatterGatherEntry),
1281         VMSTATE_END_OF_LIST()
1282     }
1283 };
1284 
1285 typedef struct VMBusChanReqSave {
1286     uint16_t chan_idx;
1287     uint16_t pkt_type;
1288     uint32_t msglen;
1289     void *msg;
1290     uint64_t transaction_id;
1291     bool need_comp;
1292     uint32_t num;
1293     ScatterGatherEntry *sgl;
1294 } VMBusChanReqSave;
1295 
1296 static const VMStateDescription vmstate_vmbus_chan_req = {
1297     .name = "vmbus/vmbus_chan_req",
1298     .version_id = 0,
1299     .minimum_version_id = 0,
1300     .fields = (VMStateField[]) {
1301         VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1302         VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1303         VMSTATE_UINT32(msglen, VMBusChanReqSave),
1304         VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1305         VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1306         VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1307         VMSTATE_UINT32(num, VMBusChanReqSave),
1308         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1309                                              vmstate_sgent, ScatterGatherEntry),
1310         VMSTATE_END_OF_LIST()
1311     }
1312 };
1313 
1314 void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1315 {
1316     VMBusChanReqSave req_save;
1317 
1318     req_save.chan_idx = req->chan->subchan_idx;
1319     req_save.pkt_type = req->pkt_type;
1320     req_save.msglen = req->msglen;
1321     req_save.msg = req->msg;
1322     req_save.transaction_id = req->transaction_id;
1323     req_save.need_comp = req->need_comp;
1324     req_save.num = req->sgl.nsg;
1325     req_save.sgl = g_memdup(req->sgl.sg,
1326                             req_save.num * sizeof(ScatterGatherEntry));
1327 
1328     vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1329 
1330     g_free(req_save.sgl);
1331 }
1332 
1333 void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1334 {
1335     VMBusChanReqSave req_save;
1336     VMBusChanReq *req = NULL;
1337     VMBusChannel *chan = NULL;
1338     uint32_t i;
1339 
1340     vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1341 
1342     if (req_save.chan_idx >= dev->num_channels) {
1343         error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1344                      req_save.chan_idx, dev->num_channels);
1345         goto out;
1346     }
1347     chan = &dev->channels[req_save.chan_idx];
1348 
1349     if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1350         goto out;
1351     }
1352 
1353     req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1354                           req_save.transaction_id, req_save.need_comp);
1355     if (req_save.msglen) {
1356         memcpy(req->msg, req_save.msg, req_save.msglen);
1357     }
1358 
1359     for (i = 0; i < req_save.num; i++) {
1360         qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1361     }
1362 
1363 out:
1364     if (req_save.msglen) {
1365         g_free(req_save.msg);
1366     }
1367     if (req_save.num) {
1368         g_free(req_save.sgl);
1369     }
1370     return req;
1371 }
1372 
1373 static void channel_event_cb(EventNotifier *e)
1374 {
1375     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1376     if (event_notifier_test_and_clear(e)) {
1377         /*
1378          * All receives are supposed to happen within the device worker, so
1379          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1380          * potentially reuse the cached mapping throughout the worker.
1381          * Can't do this for sends as they may happen outside the device
1382          * worker.
1383          */
1384         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1385         ringbuf_start_io(&ringbuf->common);
1386         chan->notify_cb(chan);
1387         ringbuf_end_io(&ringbuf->common);
1388 
1389     }
1390 }
1391 
1392 static int alloc_chan_id(VMBus *vmbus)
1393 {
1394     int ret;
1395 
1396     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1397     if (ret == VMBUS_CHANID_COUNT) {
1398         return -ENOMEM;
1399     }
1400     return ret + VMBUS_FIRST_CHANID;
1401 }
1402 
1403 static int register_chan_id(VMBusChannel *chan)
1404 {
1405     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1406                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1407 }
1408 
1409 static void unregister_chan_id(VMBusChannel *chan)
1410 {
1411     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1412 }
1413 
1414 static uint32_t chan_connection_id(VMBusChannel *chan)
1415 {
1416     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1417 }
1418 
1419 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1420                          VMBusChannel *chan, uint16_t idx, Error **errp)
1421 {
1422     int res;
1423 
1424     chan->dev = dev;
1425     chan->notify_cb = vdc->chan_notify_cb;
1426     chan->subchan_idx = idx;
1427     chan->vmbus = vmbus;
1428 
1429     res = alloc_chan_id(vmbus);
1430     if (res < 0) {
1431         error_setg(errp, "no spare channel id");
1432         return;
1433     }
1434     chan->id = res;
1435     register_chan_id(chan);
1436 
1437     /*
1438      * The guest drivers depend on the device subchannels (idx #1+) to be
1439      * offered after the primary channel (idx #0) of that device.  To ensure
1440      * that, record the channels on the channel list in the order they appear
1441      * within the device.
1442      */
1443     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1444 }
1445 
1446 static void deinit_channel(VMBusChannel *chan)
1447 {
1448     assert(chan->state == VMCHAN_INIT);
1449     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1450     unregister_chan_id(chan);
1451 }
1452 
1453 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1454 {
1455     uint16_t i;
1456     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1457     Error *err = NULL;
1458 
1459     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1460     if (dev->num_channels < 1) {
1461         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1462         return;
1463     }
1464 
1465     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1466     for (i = 0; i < dev->num_channels; i++) {
1467         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1468         if (err) {
1469             goto err_init;
1470         }
1471     }
1472 
1473     return;
1474 
1475 err_init:
1476     while (i--) {
1477         deinit_channel(&dev->channels[i]);
1478     }
1479     error_propagate(errp, err);
1480 }
1481 
1482 static void free_channels(VMBusDevice *dev)
1483 {
1484     uint16_t i;
1485     for (i = 0; i < dev->num_channels; i++) {
1486         deinit_channel(&dev->channels[i]);
1487     }
1488     g_free(dev->channels);
1489 }
1490 
1491 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1492 {
1493     VMBusChannel *chan;
1494 
1495     if (vp_index == vmbus->target_vp) {
1496         hyperv_sint_route_ref(vmbus->sint_route);
1497         return vmbus->sint_route;
1498     }
1499 
1500     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1501         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1502             hyperv_sint_route_ref(chan->notify_route);
1503             return chan->notify_route;
1504         }
1505     }
1506 
1507     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1508 }
1509 
1510 static void open_channel(VMBusChannel *chan)
1511 {
1512     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1513 
1514     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1515     if (!chan->gpadl) {
1516         return;
1517     }
1518 
1519     if (ringbufs_init(chan)) {
1520         goto put_gpadl;
1521     }
1522 
1523     if (event_notifier_init(&chan->notifier, 0)) {
1524         goto put_gpadl;
1525     }
1526 
1527     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1528 
1529     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1530                                       &chan->notifier)) {
1531         goto cleanup_notifier;
1532     }
1533 
1534     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1535     if (!chan->notify_route) {
1536         goto clear_event_flag_handler;
1537     }
1538 
1539     if (vdc->open_channel && vdc->open_channel(chan)) {
1540         goto unref_sint_route;
1541     }
1542 
1543     chan->is_open = true;
1544     return;
1545 
1546 unref_sint_route:
1547     hyperv_sint_route_unref(chan->notify_route);
1548 clear_event_flag_handler:
1549     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1550 cleanup_notifier:
1551     event_notifier_set_handler(&chan->notifier, NULL);
1552     event_notifier_cleanup(&chan->notifier);
1553 put_gpadl:
1554     vmbus_put_gpadl(chan->gpadl);
1555 }
1556 
1557 static void close_channel(VMBusChannel *chan)
1558 {
1559     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1560 
1561     if (!chan->is_open) {
1562         return;
1563     }
1564 
1565     if (vdc->close_channel) {
1566         vdc->close_channel(chan);
1567     }
1568 
1569     hyperv_sint_route_unref(chan->notify_route);
1570     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1571     event_notifier_set_handler(&chan->notifier, NULL);
1572     event_notifier_cleanup(&chan->notifier);
1573     vmbus_put_gpadl(chan->gpadl);
1574     chan->is_open = false;
1575 }
1576 
1577 static int channel_post_load(void *opaque, int version_id)
1578 {
1579     VMBusChannel *chan = opaque;
1580 
1581     return register_chan_id(chan);
1582 }
1583 
1584 static const VMStateDescription vmstate_channel = {
1585     .name = "vmbus/channel",
1586     .version_id = 0,
1587     .minimum_version_id = 0,
1588     .post_load = channel_post_load,
1589     .fields = (VMStateField[]) {
1590         VMSTATE_UINT32(id, VMBusChannel),
1591         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1592         VMSTATE_UINT32(open_id, VMBusChannel),
1593         VMSTATE_UINT32(target_vp, VMBusChannel),
1594         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1595         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1596         VMSTATE_UINT8(offer_state, VMBusChannel),
1597         VMSTATE_UINT8(state, VMBusChannel),
1598         VMSTATE_END_OF_LIST()
1599     }
1600 };
1601 
1602 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1603 {
1604     VMBusChannel *chan;
1605     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1606         if (chan->id == id) {
1607             return chan;
1608         }
1609     }
1610     return NULL;
1611 }
1612 
1613 static int enqueue_incoming_message(VMBus *vmbus,
1614                                     const struct hyperv_post_message_input *msg)
1615 {
1616     int ret = 0;
1617     uint8_t idx, prev_size;
1618 
1619     qemu_mutex_lock(&vmbus->rx_queue_lock);
1620 
1621     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1622         ret = -ENOBUFS;
1623         goto out;
1624     }
1625 
1626     prev_size = vmbus->rx_queue_size;
1627     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1628     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1629     vmbus->rx_queue_size++;
1630 
1631     /* only need to resched if the queue was empty before */
1632     if (!prev_size) {
1633         vmbus_resched(vmbus);
1634     }
1635 out:
1636     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1637     return ret;
1638 }
1639 
1640 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1641                                    void *data)
1642 {
1643     VMBus *vmbus = data;
1644     struct vmbus_message_header *vmbus_msg;
1645 
1646     if (msg->message_type != HV_MESSAGE_VMBUS) {
1647         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1648     }
1649 
1650     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1651         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1652     }
1653 
1654     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1655 
1656     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1657 
1658     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1659         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1660         error_report("vmbus: unknown message type %#x",
1661                      vmbus_msg->message_type);
1662         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1663     }
1664 
1665     if (enqueue_incoming_message(vmbus, msg)) {
1666         return HV_STATUS_INSUFFICIENT_BUFFERS;
1667     }
1668     return HV_STATUS_SUCCESS;
1669 }
1670 
1671 static bool vmbus_initialized(VMBus *vmbus)
1672 {
1673     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1674 }
1675 
1676 static void vmbus_reset_all(VMBus *vmbus)
1677 {
1678     qbus_reset_all(BUS(vmbus));
1679 }
1680 
1681 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1682 {
1683     int ret;
1684     struct hyperv_message msg = {
1685         .header.message_type = HV_MESSAGE_VMBUS,
1686     };
1687 
1688     assert(!vmbus->msg_in_progress);
1689     assert(msglen <= sizeof(msg.payload));
1690     assert(msglen >= sizeof(struct vmbus_message_header));
1691 
1692     vmbus->msg_in_progress = true;
1693 
1694     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1695                          msglen);
1696 
1697     memcpy(msg.payload, msgdata, msglen);
1698     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1699 
1700     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1701     if (ret == 0 || ret == -EAGAIN) {
1702         return;
1703     }
1704 
1705     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1706     vmbus_reset_all(vmbus);
1707 }
1708 
1709 static int vmbus_init(VMBus *vmbus)
1710 {
1711     if (vmbus->target_vp != (uint32_t)-1) {
1712         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1713                                                   vmbus_msg_cb, vmbus);
1714         if (!vmbus->sint_route) {
1715             error_report("failed to set up SINT route");
1716             return -ENOMEM;
1717         }
1718     }
1719     return 0;
1720 }
1721 
1722 static void vmbus_deinit(VMBus *vmbus)
1723 {
1724     VMBusGpadl *gpadl, *tmp_gpadl;
1725     VMBusChannel *chan;
1726 
1727     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1728         if (gpadl->state == VMGPADL_TORNDOWN) {
1729             continue;
1730         }
1731         vmbus_put_gpadl(gpadl);
1732     }
1733 
1734     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1735         chan->offer_state = VMOFFER_INIT;
1736     }
1737 
1738     hyperv_sint_route_unref(vmbus->sint_route);
1739     vmbus->sint_route = NULL;
1740     vmbus->int_page_gpa = 0;
1741     vmbus->target_vp = (uint32_t)-1;
1742     vmbus->version = 0;
1743     vmbus->state = VMBUS_LISTEN;
1744     vmbus->msg_in_progress = false;
1745 }
1746 
1747 static void handle_initiate_contact(VMBus *vmbus,
1748                                     vmbus_message_initiate_contact *msg,
1749                                     uint32_t msglen)
1750 {
1751     if (msglen < sizeof(*msg)) {
1752         return;
1753     }
1754 
1755     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1756                                  msg->version_requested & 0xffff,
1757                                  msg->target_vcpu, msg->monitor_page1,
1758                                  msg->monitor_page2, msg->interrupt_page);
1759 
1760     /*
1761      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1762      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1763      * before handing over to OS loader.
1764      */
1765     vmbus_reset_all(vmbus);
1766 
1767     vmbus->target_vp = msg->target_vcpu;
1768     vmbus->version = msg->version_requested;
1769     if (vmbus->version < VMBUS_VERSION_WIN8) {
1770         /* linux passes interrupt page even when it doesn't need it */
1771         vmbus->int_page_gpa = msg->interrupt_page;
1772     }
1773     vmbus->state = VMBUS_HANDSHAKE;
1774 
1775     if (vmbus_init(vmbus)) {
1776         error_report("failed to init vmbus; aborting");
1777         vmbus_deinit(vmbus);
1778         return;
1779     }
1780 }
1781 
1782 static void send_handshake(VMBus *vmbus)
1783 {
1784     struct vmbus_message_version_response msg = {
1785         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1786         .version_supported = vmbus_initialized(vmbus),
1787     };
1788 
1789     post_msg(vmbus, &msg, sizeof(msg));
1790 }
1791 
1792 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1793 {
1794     VMBusChannel *chan;
1795 
1796     if (!vmbus_initialized(vmbus)) {
1797         return;
1798     }
1799 
1800     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1801         if (chan->offer_state == VMOFFER_INIT) {
1802             chan->offer_state = VMOFFER_SENDING;
1803             break;
1804         }
1805     }
1806 
1807     vmbus->state = VMBUS_OFFER;
1808 }
1809 
1810 static void send_offer(VMBus *vmbus)
1811 {
1812     VMBusChannel *chan;
1813     struct vmbus_message_header alloffers_msg = {
1814         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1815     };
1816 
1817     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1818         if (chan->offer_state == VMOFFER_SENDING) {
1819             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1820             /* Hyper-V wants LE GUIDs */
1821             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1822             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1823             struct vmbus_message_offer_channel msg = {
1824                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1825                 .child_relid = chan->id,
1826                 .connection_id = chan_connection_id(chan),
1827                 .channel_flags = vdc->channel_flags,
1828                 .mmio_size_mb = vdc->mmio_size_mb,
1829                 .sub_channel_index = vmbus_channel_idx(chan),
1830                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1831             };
1832 
1833             memcpy(msg.type_uuid, &classid, sizeof(classid));
1834             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1835 
1836             trace_vmbus_send_offer(chan->id, chan->dev);
1837 
1838             post_msg(vmbus, &msg, sizeof(msg));
1839             return;
1840         }
1841     }
1842 
1843     /* no more offers, send terminator message */
1844     trace_vmbus_terminate_offers();
1845     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1846 }
1847 
1848 static bool complete_offer(VMBus *vmbus)
1849 {
1850     VMBusChannel *chan;
1851 
1852     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1853         if (chan->offer_state == VMOFFER_SENDING) {
1854             chan->offer_state = VMOFFER_SENT;
1855             goto next_offer;
1856         }
1857     }
1858     /*
1859      * no transitioning channels found so this is completing the terminator
1860      * message, and vmbus can move to the next state
1861      */
1862     return true;
1863 
1864 next_offer:
1865     /* try to mark another channel for offering */
1866     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1867         if (chan->offer_state == VMOFFER_INIT) {
1868             chan->offer_state = VMOFFER_SENDING;
1869             break;
1870         }
1871     }
1872     /*
1873      * if an offer has been sent there are more offers or the terminator yet to
1874      * send, so no state transition for vmbus
1875      */
1876     return false;
1877 }
1878 
1879 
1880 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1881                                 uint32_t msglen)
1882 {
1883     VMBusGpadl *gpadl;
1884     uint32_t num_gfns, i;
1885 
1886     /* must include at least one gpa range */
1887     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1888         !vmbus_initialized(vmbus)) {
1889         return;
1890     }
1891 
1892     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1893                sizeof(msg->range[0].pfn_array[0]);
1894 
1895     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1896 
1897     /*
1898      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1899      * ranges each with arbitrary size and alignment.  However in practice only
1900      * single-range page-aligned GPADLs have been observed so just ignore
1901      * anything else and simplify things greatly.
1902      */
1903     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1904         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1905         return;
1906     }
1907 
1908     /* ignore requests to create already existing GPADLs */
1909     if (find_gpadl(vmbus, msg->gpadl_id)) {
1910         return;
1911     }
1912 
1913     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1914 
1915     for (i = 0; i < num_gfns &&
1916          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1917          i++) {
1918         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1919     }
1920 
1921     if (gpadl_full(gpadl)) {
1922         vmbus->state = VMBUS_CREATE_GPADL;
1923     }
1924 }
1925 
1926 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1927                               uint32_t msglen)
1928 {
1929     VMBusGpadl *gpadl;
1930     uint32_t num_gfns_left, i;
1931 
1932     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1933         return;
1934     }
1935 
1936     trace_vmbus_gpadl_body(msg->gpadl_id);
1937 
1938     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1939     if (!gpadl) {
1940         return;
1941     }
1942 
1943     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1944     assert(num_gfns_left);
1945 
1946     for (i = 0; i < num_gfns_left &&
1947          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1948         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1949     }
1950 
1951     if (gpadl_full(gpadl)) {
1952         vmbus->state = VMBUS_CREATE_GPADL;
1953     }
1954 }
1955 
1956 static void send_create_gpadl(VMBus *vmbus)
1957 {
1958     VMBusGpadl *gpadl;
1959 
1960     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1961         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1962             struct vmbus_message_gpadl_created msg = {
1963                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1964                 .gpadl_id = gpadl->id,
1965                 .child_relid = gpadl->child_relid,
1966             };
1967 
1968             trace_vmbus_gpadl_created(gpadl->id);
1969             post_msg(vmbus, &msg, sizeof(msg));
1970             return;
1971         }
1972     }
1973 
1974     assert(false);
1975 }
1976 
1977 static bool complete_create_gpadl(VMBus *vmbus)
1978 {
1979     VMBusGpadl *gpadl;
1980 
1981     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1982         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1983             gpadl->state = VMGPADL_ALIVE;
1984 
1985             return true;
1986         }
1987     }
1988 
1989     assert(false);
1990     return false;
1991 }
1992 
1993 static void handle_gpadl_teardown(VMBus *vmbus,
1994                                   vmbus_message_gpadl_teardown *msg,
1995                                   uint32_t msglen)
1996 {
1997     VMBusGpadl *gpadl;
1998 
1999     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2000         return;
2001     }
2002 
2003     trace_vmbus_gpadl_teardown(msg->gpadl_id);
2004 
2005     gpadl = find_gpadl(vmbus, msg->gpadl_id);
2006     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2007         return;
2008     }
2009 
2010     gpadl->state = VMGPADL_TEARINGDOWN;
2011     vmbus->state = VMBUS_TEARDOWN_GPADL;
2012 }
2013 
2014 static void send_teardown_gpadl(VMBus *vmbus)
2015 {
2016     VMBusGpadl *gpadl;
2017 
2018     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2019         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2020             struct vmbus_message_gpadl_torndown msg = {
2021                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2022                 .gpadl_id = gpadl->id,
2023             };
2024 
2025             trace_vmbus_gpadl_torndown(gpadl->id);
2026             post_msg(vmbus, &msg, sizeof(msg));
2027             return;
2028         }
2029     }
2030 
2031     assert(false);
2032 }
2033 
2034 static bool complete_teardown_gpadl(VMBus *vmbus)
2035 {
2036     VMBusGpadl *gpadl;
2037 
2038     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2039         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2040             gpadl->state = VMGPADL_TORNDOWN;
2041             vmbus_put_gpadl(gpadl);
2042             return true;
2043         }
2044     }
2045 
2046     assert(false);
2047     return false;
2048 }
2049 
2050 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2051                                 uint32_t msglen)
2052 {
2053     VMBusChannel *chan;
2054 
2055     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2056         return;
2057     }
2058 
2059     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2060                              msg->target_vp);
2061     chan = find_channel(vmbus, msg->child_relid);
2062     if (!chan || chan->state != VMCHAN_INIT) {
2063         return;
2064     }
2065 
2066     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2067     chan->ringbuf_send_offset = msg->ring_buffer_offset;
2068     chan->target_vp = msg->target_vp;
2069     chan->open_id = msg->open_id;
2070 
2071     open_channel(chan);
2072 
2073     chan->state = VMCHAN_OPENING;
2074     vmbus->state = VMBUS_OPEN_CHANNEL;
2075 }
2076 
2077 static void send_open_channel(VMBus *vmbus)
2078 {
2079     VMBusChannel *chan;
2080 
2081     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2082         if (chan->state == VMCHAN_OPENING) {
2083             struct vmbus_message_open_result msg = {
2084                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2085                 .child_relid = chan->id,
2086                 .open_id = chan->open_id,
2087                 .status = !vmbus_channel_is_open(chan),
2088             };
2089 
2090             trace_vmbus_channel_open(chan->id, msg.status);
2091             post_msg(vmbus, &msg, sizeof(msg));
2092             return;
2093         }
2094     }
2095 
2096     assert(false);
2097 }
2098 
2099 static bool complete_open_channel(VMBus *vmbus)
2100 {
2101     VMBusChannel *chan;
2102 
2103     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2104         if (chan->state == VMCHAN_OPENING) {
2105             if (vmbus_channel_is_open(chan)) {
2106                 chan->state = VMCHAN_OPEN;
2107                 /*
2108                  * simulate guest notification of ringbuffer space made
2109                  * available, for the channel protocols where the host
2110                  * initiates the communication
2111                  */
2112                 vmbus_channel_notify_host(chan);
2113             } else {
2114                 chan->state = VMCHAN_INIT;
2115             }
2116             return true;
2117         }
2118     }
2119 
2120     assert(false);
2121     return false;
2122 }
2123 
2124 static void vdev_reset_on_close(VMBusDevice *vdev)
2125 {
2126     uint16_t i;
2127 
2128     for (i = 0; i < vdev->num_channels; i++) {
2129         if (vmbus_channel_is_open(&vdev->channels[i])) {
2130             return;
2131         }
2132     }
2133 
2134     /* all channels closed -- reset device */
2135     qdev_reset_all(DEVICE(vdev));
2136 }
2137 
2138 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2139                                  uint32_t msglen)
2140 {
2141     VMBusChannel *chan;
2142 
2143     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2144         return;
2145     }
2146 
2147     trace_vmbus_close_channel(msg->child_relid);
2148 
2149     chan = find_channel(vmbus, msg->child_relid);
2150     if (!chan) {
2151         return;
2152     }
2153 
2154     close_channel(chan);
2155     chan->state = VMCHAN_INIT;
2156 
2157     vdev_reset_on_close(chan->dev);
2158 }
2159 
2160 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2161 {
2162     vmbus->state = VMBUS_UNLOAD;
2163 }
2164 
2165 static void send_unload(VMBus *vmbus)
2166 {
2167     vmbus_message_header msg = {
2168         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2169     };
2170 
2171     qemu_mutex_lock(&vmbus->rx_queue_lock);
2172     vmbus->rx_queue_size = 0;
2173     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2174 
2175     post_msg(vmbus, &msg, sizeof(msg));
2176     return;
2177 }
2178 
2179 static bool complete_unload(VMBus *vmbus)
2180 {
2181     vmbus_reset_all(vmbus);
2182     return true;
2183 }
2184 
2185 static void process_message(VMBus *vmbus)
2186 {
2187     struct hyperv_post_message_input *hv_msg;
2188     struct vmbus_message_header *msg;
2189     void *msgdata;
2190     uint32_t msglen;
2191 
2192     qemu_mutex_lock(&vmbus->rx_queue_lock);
2193 
2194     if (!vmbus->rx_queue_size) {
2195         goto unlock;
2196     }
2197 
2198     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2199     msglen =  hv_msg->payload_size;
2200     if (msglen < sizeof(*msg)) {
2201         goto out;
2202     }
2203     msgdata = hv_msg->payload;
2204     msg = (struct vmbus_message_header *)msgdata;
2205 
2206     trace_vmbus_process_incoming_message(msg->message_type);
2207 
2208     switch (msg->message_type) {
2209     case VMBUS_MSG_INITIATE_CONTACT:
2210         handle_initiate_contact(vmbus, msgdata, msglen);
2211         break;
2212     case VMBUS_MSG_REQUESTOFFERS:
2213         handle_request_offers(vmbus, msgdata, msglen);
2214         break;
2215     case VMBUS_MSG_GPADL_HEADER:
2216         handle_gpadl_header(vmbus, msgdata, msglen);
2217         break;
2218     case VMBUS_MSG_GPADL_BODY:
2219         handle_gpadl_body(vmbus, msgdata, msglen);
2220         break;
2221     case VMBUS_MSG_GPADL_TEARDOWN:
2222         handle_gpadl_teardown(vmbus, msgdata, msglen);
2223         break;
2224     case VMBUS_MSG_OPENCHANNEL:
2225         handle_open_channel(vmbus, msgdata, msglen);
2226         break;
2227     case VMBUS_MSG_CLOSECHANNEL:
2228         handle_close_channel(vmbus, msgdata, msglen);
2229         break;
2230     case VMBUS_MSG_UNLOAD:
2231         handle_unload(vmbus, msgdata, msglen);
2232         break;
2233     default:
2234         error_report("unknown message type %#x", msg->message_type);
2235         break;
2236     }
2237 
2238 out:
2239     vmbus->rx_queue_size--;
2240     vmbus->rx_queue_head++;
2241     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2242 
2243     vmbus_resched(vmbus);
2244 unlock:
2245     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2246 }
2247 
2248 static const struct {
2249     void (*run)(VMBus *vmbus);
2250     bool (*complete)(VMBus *vmbus);
2251 } state_runner[] = {
2252     [VMBUS_LISTEN]         = {process_message,     NULL},
2253     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2254     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2255     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2256     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2257     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2258     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2259 };
2260 
2261 static void vmbus_do_run(VMBus *vmbus)
2262 {
2263     if (vmbus->msg_in_progress) {
2264         return;
2265     }
2266 
2267     assert(vmbus->state < VMBUS_STATE_MAX);
2268     assert(state_runner[vmbus->state].run);
2269     state_runner[vmbus->state].run(vmbus);
2270 }
2271 
2272 static void vmbus_run(void *opaque)
2273 {
2274     VMBus *vmbus = opaque;
2275 
2276     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2277     if (vmbus->in_progress) {
2278         return;
2279     }
2280 
2281     vmbus->in_progress = true;
2282     /*
2283      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2284      * should go *after* the code that can result in aio_poll; otherwise
2285      * reschedules can be missed.  No idea how to enforce that.
2286      */
2287     vmbus_do_run(vmbus);
2288     vmbus->in_progress = false;
2289 }
2290 
2291 static void vmbus_msg_cb(void *data, int status)
2292 {
2293     VMBus *vmbus = data;
2294     bool (*complete)(VMBus *vmbus);
2295 
2296     assert(vmbus->msg_in_progress);
2297 
2298     trace_vmbus_msg_cb(status);
2299 
2300     if (status == -EAGAIN) {
2301         goto out;
2302     }
2303     if (status) {
2304         error_report("message delivery fatal failure: %d; aborting vmbus",
2305                      status);
2306         vmbus_reset_all(vmbus);
2307         return;
2308     }
2309 
2310     assert(vmbus->state < VMBUS_STATE_MAX);
2311     complete = state_runner[vmbus->state].complete;
2312     if (!complete || complete(vmbus)) {
2313         vmbus->state = VMBUS_LISTEN;
2314     }
2315 out:
2316     vmbus->msg_in_progress = false;
2317     vmbus_resched(vmbus);
2318 }
2319 
2320 static void vmbus_resched(VMBus *vmbus)
2321 {
2322     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2323 }
2324 
2325 static void vmbus_signal_event(EventNotifier *e)
2326 {
2327     VMBusChannel *chan;
2328     VMBus *vmbus = container_of(e, VMBus, notifier);
2329     unsigned long *int_map;
2330     hwaddr addr, len;
2331     bool is_dirty = false;
2332 
2333     if (!event_notifier_test_and_clear(e)) {
2334         return;
2335     }
2336 
2337     trace_vmbus_signal_event();
2338 
2339     if (!vmbus->int_page_gpa) {
2340         return;
2341     }
2342 
2343     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2344     len = TARGET_PAGE_SIZE / 2;
2345     int_map = cpu_physical_memory_map(addr, &len, 1);
2346     if (len != TARGET_PAGE_SIZE / 2) {
2347         goto unmap;
2348     }
2349 
2350     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2351         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2352             if (!vmbus_channel_is_open(chan)) {
2353                 continue;
2354             }
2355             vmbus_channel_notify_host(chan);
2356             is_dirty = true;
2357         }
2358     }
2359 
2360 unmap:
2361     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2362 }
2363 
2364 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2365 {
2366     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2367     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2368     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2369     BusChild *child;
2370     Error *err = NULL;
2371     char idstr[UUID_FMT_LEN + 1];
2372 
2373     assert(!qemu_uuid_is_null(&vdev->instanceid));
2374 
2375     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2376         /* Class wants to only have a single instance with a fixed UUID */
2377         if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2378             error_setg(&err, "instance id can't be changed");
2379             goto error_out;
2380         }
2381     }
2382 
2383     /* Check for instance id collision for this class id */
2384     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2385         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2386 
2387         if (child_dev == vdev) {
2388             continue;
2389         }
2390 
2391         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2392             qemu_uuid_unparse(&vdev->instanceid, idstr);
2393             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2394             goto error_out;
2395         }
2396     }
2397 
2398     vdev->dma_as = &address_space_memory;
2399 
2400     create_channels(vmbus, vdev, &err);
2401     if (err) {
2402         goto error_out;
2403     }
2404 
2405     if (vdc->vmdev_realize) {
2406         vdc->vmdev_realize(vdev, &err);
2407         if (err) {
2408             goto err_vdc_realize;
2409         }
2410     }
2411     return;
2412 
2413 err_vdc_realize:
2414     free_channels(vdev);
2415 error_out:
2416     error_propagate(errp, err);
2417 }
2418 
2419 static void vmbus_dev_reset(DeviceState *dev)
2420 {
2421     uint16_t i;
2422     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2423     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2424 
2425     if (vdev->channels) {
2426         for (i = 0; i < vdev->num_channels; i++) {
2427             VMBusChannel *chan = &vdev->channels[i];
2428             close_channel(chan);
2429             chan->state = VMCHAN_INIT;
2430         }
2431     }
2432 
2433     if (vdc->vmdev_reset) {
2434         vdc->vmdev_reset(vdev);
2435     }
2436 }
2437 
2438 static void vmbus_dev_unrealize(DeviceState *dev)
2439 {
2440     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2441     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2442 
2443     if (vdc->vmdev_unrealize) {
2444         vdc->vmdev_unrealize(vdev);
2445     }
2446     free_channels(vdev);
2447 }
2448 
2449 static Property vmbus_dev_props[] = {
2450     DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2451     DEFINE_PROP_END_OF_LIST()
2452 };
2453 
2454 
2455 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2456 {
2457     DeviceClass *kdev = DEVICE_CLASS(klass);
2458     device_class_set_props(kdev, vmbus_dev_props);
2459     kdev->bus_type = TYPE_VMBUS;
2460     kdev->realize = vmbus_dev_realize;
2461     kdev->unrealize = vmbus_dev_unrealize;
2462     kdev->reset = vmbus_dev_reset;
2463 }
2464 
2465 static void vmbus_dev_instance_init(Object *obj)
2466 {
2467     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2468     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2469 
2470     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2471         /* Class wants to only have a single instance with a fixed UUID */
2472         vdev->instanceid = vdc->instanceid;
2473     }
2474 }
2475 
2476 const VMStateDescription vmstate_vmbus_dev = {
2477     .name = TYPE_VMBUS_DEVICE,
2478     .version_id = 0,
2479     .minimum_version_id = 0,
2480     .fields = (VMStateField[]) {
2481         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2482         VMSTATE_UINT16(num_channels, VMBusDevice),
2483         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2484                                              num_channels, vmstate_channel,
2485                                              VMBusChannel),
2486         VMSTATE_END_OF_LIST()
2487     }
2488 };
2489 
2490 /* vmbus generic device base */
2491 static const TypeInfo vmbus_dev_type_info = {
2492     .name = TYPE_VMBUS_DEVICE,
2493     .parent = TYPE_DEVICE,
2494     .abstract = true,
2495     .instance_size = sizeof(VMBusDevice),
2496     .class_size = sizeof(VMBusDeviceClass),
2497     .class_init = vmbus_dev_class_init,
2498     .instance_init = vmbus_dev_instance_init,
2499 };
2500 
2501 static void vmbus_realize(BusState *bus, Error **errp)
2502 {
2503     int ret = 0;
2504     Error *local_err = NULL;
2505     VMBus *vmbus = VMBUS(bus);
2506 
2507     qemu_mutex_init(&vmbus->rx_queue_lock);
2508 
2509     QTAILQ_INIT(&vmbus->gpadl_list);
2510     QTAILQ_INIT(&vmbus->channel_list);
2511 
2512     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2513                                  vmbus_recv_message, vmbus);
2514     if (ret != 0) {
2515         error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2516         goto error_out;
2517     }
2518 
2519     ret = event_notifier_init(&vmbus->notifier, 0);
2520     if (ret != 0) {
2521         error_setg(&local_err, "event notifier failed to init with %d", ret);
2522         goto remove_msg_handler;
2523     }
2524 
2525     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2526     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2527                                         &vmbus->notifier);
2528     if (ret != 0) {
2529         error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2530         goto clear_event_notifier;
2531     }
2532 
2533     return;
2534 
2535 clear_event_notifier:
2536     event_notifier_cleanup(&vmbus->notifier);
2537 remove_msg_handler:
2538     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2539 error_out:
2540     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2541     error_propagate(errp, local_err);
2542 }
2543 
2544 static void vmbus_unrealize(BusState *bus)
2545 {
2546     VMBus *vmbus = VMBUS(bus);
2547 
2548     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2549     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2550     event_notifier_cleanup(&vmbus->notifier);
2551 
2552     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2553 }
2554 
2555 static void vmbus_reset(BusState *bus)
2556 {
2557     vmbus_deinit(VMBUS(bus));
2558 }
2559 
2560 static char *vmbus_get_dev_path(DeviceState *dev)
2561 {
2562     BusState *bus = qdev_get_parent_bus(dev);
2563     return qdev_get_dev_path(bus->parent);
2564 }
2565 
2566 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2567 {
2568     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2569     char uuid[UUID_FMT_LEN + 1];
2570 
2571     qemu_uuid_unparse(&vdev->instanceid, uuid);
2572     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2573 }
2574 
2575 static void vmbus_class_init(ObjectClass *klass, void *data)
2576 {
2577     BusClass *k = BUS_CLASS(klass);
2578 
2579     k->get_dev_path = vmbus_get_dev_path;
2580     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2581     k->realize = vmbus_realize;
2582     k->unrealize = vmbus_unrealize;
2583     k->reset = vmbus_reset;
2584 }
2585 
2586 static int vmbus_pre_load(void *opaque)
2587 {
2588     VMBusChannel *chan;
2589     VMBus *vmbus = VMBUS(opaque);
2590 
2591     /*
2592      * channel IDs allocated by the source will come in the migration stream
2593      * for each channel, so clean up the ones allocated at realize
2594      */
2595     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2596         unregister_chan_id(chan);
2597     }
2598 
2599     return 0;
2600 }
2601 static int vmbus_post_load(void *opaque, int version_id)
2602 {
2603     int ret;
2604     VMBus *vmbus = VMBUS(opaque);
2605     VMBusGpadl *gpadl;
2606     VMBusChannel *chan;
2607 
2608     ret = vmbus_init(vmbus);
2609     if (ret) {
2610         return ret;
2611     }
2612 
2613     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2614         gpadl->vmbus = vmbus;
2615         gpadl->refcount = 1;
2616     }
2617 
2618     /*
2619      * reopening channels depends on initialized vmbus so it's done here
2620      * instead of channel_post_load()
2621      */
2622     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2623 
2624         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2625             open_channel(chan);
2626         }
2627 
2628         if (chan->state != VMCHAN_OPEN) {
2629             continue;
2630         }
2631 
2632         if (!vmbus_channel_is_open(chan)) {
2633             /* reopen failed, abort loading */
2634             return -1;
2635         }
2636 
2637         /* resume processing on the guest side if it missed the notification */
2638         hyperv_sint_route_set_sint(chan->notify_route);
2639         /* ditto on the host side */
2640         vmbus_channel_notify_host(chan);
2641     }
2642 
2643     vmbus_resched(vmbus);
2644     return 0;
2645 }
2646 
2647 static const VMStateDescription vmstate_post_message_input = {
2648     .name = "vmbus/hyperv_post_message_input",
2649     .version_id = 0,
2650     .minimum_version_id = 0,
2651     .fields = (VMStateField[]) {
2652         /*
2653          * skip connection_id and message_type as they are validated before
2654          * queueing and ignored on dequeueing
2655          */
2656         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2657         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2658                             HV_MESSAGE_PAYLOAD_SIZE),
2659         VMSTATE_END_OF_LIST()
2660     }
2661 };
2662 
2663 static bool vmbus_rx_queue_needed(void *opaque)
2664 {
2665     VMBus *vmbus = VMBUS(opaque);
2666     return vmbus->rx_queue_size;
2667 }
2668 
2669 static const VMStateDescription vmstate_rx_queue = {
2670     .name = "vmbus/rx_queue",
2671     .version_id = 0,
2672     .minimum_version_id = 0,
2673     .needed = vmbus_rx_queue_needed,
2674     .fields = (VMStateField[]) {
2675         VMSTATE_UINT8(rx_queue_head, VMBus),
2676         VMSTATE_UINT8(rx_queue_size, VMBus),
2677         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2678                              HV_MSG_QUEUE_LEN, 0,
2679                              vmstate_post_message_input,
2680                              struct hyperv_post_message_input),
2681         VMSTATE_END_OF_LIST()
2682     }
2683 };
2684 
2685 static const VMStateDescription vmstate_vmbus = {
2686     .name = TYPE_VMBUS,
2687     .version_id = 0,
2688     .minimum_version_id = 0,
2689     .pre_load = vmbus_pre_load,
2690     .post_load = vmbus_post_load,
2691     .fields = (VMStateField[]) {
2692         VMSTATE_UINT8(state, VMBus),
2693         VMSTATE_UINT32(version, VMBus),
2694         VMSTATE_UINT32(target_vp, VMBus),
2695         VMSTATE_UINT64(int_page_gpa, VMBus),
2696         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2697                          vmstate_gpadl, VMBusGpadl, link),
2698         VMSTATE_END_OF_LIST()
2699     },
2700     .subsections = (const VMStateDescription * []) {
2701         &vmstate_rx_queue,
2702         NULL
2703     }
2704 };
2705 
2706 static const TypeInfo vmbus_type_info = {
2707     .name = TYPE_VMBUS,
2708     .parent = TYPE_BUS,
2709     .instance_size = sizeof(VMBus),
2710     .class_init = vmbus_class_init,
2711 };
2712 
2713 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2714 {
2715     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2716 
2717     /*
2718      * here there's at least one vmbus bridge that is being realized, so
2719      * vmbus_bridge_find can only return NULL if it's not unique
2720      */
2721     if (!vmbus_bridge_find()) {
2722         error_setg(errp, "there can be at most one %s in the system",
2723                    TYPE_VMBUS_BRIDGE);
2724         return;
2725     }
2726 
2727     if (!hyperv_is_synic_enabled()) {
2728         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2729         return;
2730     }
2731 
2732     bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2733 }
2734 
2735 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2736 {
2737     /* there can be only one VMBus */
2738     return g_strdup("0");
2739 }
2740 
2741 static const VMStateDescription vmstate_vmbus_bridge = {
2742     .name = TYPE_VMBUS_BRIDGE,
2743     .version_id = 0,
2744     .minimum_version_id = 0,
2745     .fields = (VMStateField[]) {
2746         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2747         VMSTATE_END_OF_LIST()
2748     },
2749 };
2750 
2751 static Property vmbus_bridge_props[] = {
2752     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2753     DEFINE_PROP_END_OF_LIST()
2754 };
2755 
2756 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2757 {
2758     DeviceClass *k = DEVICE_CLASS(klass);
2759     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2760 
2761     k->realize = vmbus_bridge_realize;
2762     k->fw_name = "vmbus";
2763     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2764     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2765     k->vmsd = &vmstate_vmbus_bridge;
2766     device_class_set_props(k, vmbus_bridge_props);
2767     /* override SysBusDevice's default */
2768     k->user_creatable = true;
2769 }
2770 
2771 static const TypeInfo vmbus_bridge_type_info = {
2772     .name = TYPE_VMBUS_BRIDGE,
2773     .parent = TYPE_SYS_BUS_DEVICE,
2774     .instance_size = sizeof(VMBusBridge),
2775     .class_init = vmbus_bridge_class_init,
2776 };
2777 
2778 static void vmbus_register_types(void)
2779 {
2780     type_register_static(&vmbus_bridge_type_info);
2781     type_register_static(&vmbus_dev_type_info);
2782     type_register_static(&vmbus_type_info);
2783 }
2784 
2785 type_init(vmbus_register_types)
2786