xref: /openbmc/qemu/hw/hyperv/vmbus.c (revision db0f08df)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/hyperv/hyperv.h"
17 #include "hw/hyperv/vmbus.h"
18 #include "hw/hyperv/vmbus-bridge.h"
19 #include "hw/sysbus.h"
20 #include "cpu.h"
21 #include "trace.h"
22 
23 enum {
24     VMGPADL_INIT,
25     VMGPADL_ALIVE,
26     VMGPADL_TEARINGDOWN,
27     VMGPADL_TORNDOWN,
28 };
29 
30 struct VMBusGpadl {
31     /* GPADL id */
32     uint32_t id;
33     /* associated channel id (rudimentary?) */
34     uint32_t child_relid;
35 
36     /* number of pages in the GPADL as declared in GPADL_HEADER message */
37     uint32_t num_gfns;
38     /*
39      * Due to limited message size, GPADL may not fit fully in a single
40      * GPADL_HEADER message, and is further popluated using GPADL_BODY
41      * messages.  @seen_gfns is the number of pages seen so far; once it
42      * reaches @num_gfns, the GPADL is ready to use.
43      */
44     uint32_t seen_gfns;
45     /* array of GFNs (of size @num_gfns once allocated) */
46     uint64_t *gfns;
47 
48     uint8_t state;
49 
50     QTAILQ_ENTRY(VMBusGpadl) link;
51     VMBus *vmbus;
52     unsigned refcount;
53 };
54 
55 /*
56  * Wrap sequential read from / write to GPADL.
57  */
58 typedef struct GpadlIter {
59     VMBusGpadl *gpadl;
60     AddressSpace *as;
61     DMADirection dir;
62     /* offset into GPADL where the next i/o will be performed */
63     uint32_t off;
64     /*
65      * Cached mapping of the currently accessed page, up to page boundary.
66      * Updated lazily on i/o.
67      * Note: MemoryRegionCache can not be used here because pages in the GPADL
68      * are non-contiguous and may belong to different memory regions.
69      */
70     void *map;
71     /* offset after last i/o (i.e. not affected by seek) */
72     uint32_t last_off;
73     /*
74      * Indicator that the iterator is active and may have a cached mapping.
75      * Allows to enforce bracketing of all i/o (which may create cached
76      * mappings) and thus exclude mapping leaks.
77      */
78     bool active;
79 } GpadlIter;
80 
81 /*
82  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
83  * channel.
84  * Each ring buffer consists of a set of pages, with the first page containing
85  * the ring buffer header, and the remaining pages being for data packets.
86  */
87 typedef struct VMBusRingBufCommon {
88     AddressSpace *as;
89     /* GPA of the ring buffer header */
90     dma_addr_t rb_addr;
91     /* start and length of the ring buffer data area within GPADL */
92     uint32_t base;
93     uint32_t len;
94 
95     GpadlIter iter;
96 } VMBusRingBufCommon;
97 
98 typedef struct VMBusSendRingBuf {
99     VMBusRingBufCommon common;
100     /* current write index, to be committed at the end of send */
101     uint32_t wr_idx;
102     /* write index at the start of send */
103     uint32_t last_wr_idx;
104     /* space to be requested from the guest */
105     uint32_t wanted;
106     /* space reserved for planned sends */
107     uint32_t reserved;
108     /* last seen read index */
109     uint32_t last_seen_rd_idx;
110 } VMBusSendRingBuf;
111 
112 typedef struct VMBusRecvRingBuf {
113     VMBusRingBufCommon common;
114     /* current read index, to be committed at the end of receive */
115     uint32_t rd_idx;
116     /* read index at the start of receive */
117     uint32_t last_rd_idx;
118     /* last seen write index */
119     uint32_t last_seen_wr_idx;
120 } VMBusRecvRingBuf;
121 
122 
123 enum {
124     VMOFFER_INIT,
125     VMOFFER_SENDING,
126     VMOFFER_SENT,
127 };
128 
129 enum {
130     VMCHAN_INIT,
131     VMCHAN_OPENING,
132     VMCHAN_OPEN,
133 };
134 
135 struct VMBusChannel {
136     VMBusDevice *dev;
137 
138     /* channel id */
139     uint32_t id;
140     /*
141      * subchannel index within the device; subchannel #0 is "primary" and
142      * always exists
143      */
144     uint16_t subchan_idx;
145     uint32_t open_id;
146     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
147     uint32_t target_vp;
148     /* GPADL id to use for the ring buffers */
149     uint32_t ringbuf_gpadl;
150     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
151     uint32_t ringbuf_send_offset;
152 
153     uint8_t offer_state;
154     uint8_t state;
155     bool is_open;
156 
157     /* main device worker; copied from the device class */
158     VMBusChannelNotifyCb notify_cb;
159     /*
160      * guest->host notifications, either sent directly or dispatched via
161      * interrupt page (older VMBus)
162      */
163     EventNotifier notifier;
164 
165     VMBus *vmbus;
166     /*
167      * SINT route to signal with host->guest notifications; may be shared with
168      * the main VMBus SINT route
169      */
170     HvSintRoute *notify_route;
171     VMBusGpadl *gpadl;
172 
173     VMBusSendRingBuf send_ringbuf;
174     VMBusRecvRingBuf recv_ringbuf;
175 
176     QTAILQ_ENTRY(VMBusChannel) link;
177 };
178 
179 /*
180  * Hyper-V spec mandates that every message port has 16 buffers, which means
181  * that the guest can post up to this many messages without blocking.
182  * Therefore a queue for incoming messages has to be provided.
183  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
184  * doesn't transition to a new state until the message is known to have been
185  * successfully delivered to the respective SynIC message slot.
186  */
187 #define HV_MSG_QUEUE_LEN     16
188 
189 /* Hyper-V devices never use channel #0.  Must be something special. */
190 #define VMBUS_FIRST_CHANID      1
191 /* Each channel occupies one bit within a single event page sint slot. */
192 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
193 /* Leave a few connection numbers for other purposes. */
194 #define VMBUS_CHAN_CONNECTION_OFFSET     16
195 
196 /*
197  * Since the success or failure of sending a message is reported
198  * asynchronously, the VMBus state machine has effectively two entry points:
199  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
200  * message delivery status becomes known).  Both are run as oneshot BHs on the
201  * main aio context, ensuring serialization.
202  */
203 enum {
204     VMBUS_LISTEN,
205     VMBUS_HANDSHAKE,
206     VMBUS_OFFER,
207     VMBUS_CREATE_GPADL,
208     VMBUS_TEARDOWN_GPADL,
209     VMBUS_OPEN_CHANNEL,
210     VMBUS_UNLOAD,
211     VMBUS_STATE_MAX
212 };
213 
214 struct VMBus {
215     BusState parent;
216 
217     uint8_t state;
218     /* protection against recursive aio_poll (see vmbus_run) */
219     bool in_progress;
220     /* whether there's a message being delivered to the guest */
221     bool msg_in_progress;
222     uint32_t version;
223     /* VP_INDEX of the vCPU to send messages and interrupts to */
224     uint32_t target_vp;
225     HvSintRoute *sint_route;
226     /*
227      * interrupt page for older protocol versions; newer ones use SynIC event
228      * flags directly
229      */
230     hwaddr int_page_gpa;
231 
232     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
233 
234     /* incoming message queue */
235     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
236     uint8_t rx_queue_head;
237     uint8_t rx_queue_size;
238     QemuMutex rx_queue_lock;
239 
240     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
241     QTAILQ_HEAD(, VMBusChannel) channel_list;
242 
243     /*
244      * guest->host notifications for older VMBus, to be dispatched via
245      * interrupt page
246      */
247     EventNotifier notifier;
248 };
249 
250 static bool gpadl_full(VMBusGpadl *gpadl)
251 {
252     return gpadl->seen_gfns == gpadl->num_gfns;
253 }
254 
255 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
256                                 uint32_t child_relid, uint32_t num_gfns)
257 {
258     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
259 
260     gpadl->id = id;
261     gpadl->child_relid = child_relid;
262     gpadl->num_gfns = num_gfns;
263     gpadl->gfns = g_new(uint64_t, num_gfns);
264     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
265     gpadl->vmbus = vmbus;
266     gpadl->refcount = 1;
267     return gpadl;
268 }
269 
270 static void free_gpadl(VMBusGpadl *gpadl)
271 {
272     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
273     g_free(gpadl->gfns);
274     g_free(gpadl);
275 }
276 
277 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
278 {
279     VMBusGpadl *gpadl;
280     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
281         if (gpadl->id == gpadl_id) {
282             return gpadl;
283         }
284     }
285     return NULL;
286 }
287 
288 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
289 {
290     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
291     if (!gpadl || !gpadl_full(gpadl)) {
292         return NULL;
293     }
294     gpadl->refcount++;
295     return gpadl;
296 }
297 
298 void vmbus_put_gpadl(VMBusGpadl *gpadl)
299 {
300     if (!gpadl) {
301         return;
302     }
303     if (--gpadl->refcount) {
304         return;
305     }
306     free_gpadl(gpadl);
307 }
308 
309 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
310 {
311     return gpadl->num_gfns * TARGET_PAGE_SIZE;
312 }
313 
314 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
315                             AddressSpace *as, DMADirection dir)
316 {
317     iter->gpadl = gpadl;
318     iter->as = as;
319     iter->dir = dir;
320     iter->active = false;
321 }
322 
323 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
324 {
325     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
326     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
327 
328     /* mapping is only done to do non-zero amount of i/o */
329     assert(iter->last_off > 0);
330     assert(map_start_in_page < io_end_in_page);
331 
332     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
333                      iter->dir, io_end_in_page - map_start_in_page);
334 }
335 
336 /*
337  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
338  * The direction of the copy is determined by @iter->dir.
339  * The caller must ensure the operation overflows neither @buf nor the GPADL
340  * (there's an assert for the latter).
341  * Reuse the currently mapped page in the GPADL if possible.
342  */
343 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
344 {
345     ssize_t ret = len;
346 
347     assert(iter->active);
348 
349     while (len) {
350         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
351         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
352         uint32_t cplen = MIN(pgleft, len);
353         void *p;
354 
355         /* try to reuse the cached mapping */
356         if (iter->map) {
357             uint32_t map_start_in_page =
358                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
359             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
360             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
361             if (off_base != mapped_base || off_in_page < map_start_in_page) {
362                 gpadl_iter_cache_unmap(iter);
363                 iter->map = NULL;
364             }
365         }
366 
367         if (!iter->map) {
368             dma_addr_t maddr;
369             dma_addr_t mlen = pgleft;
370             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
371             assert(idx < iter->gpadl->num_gfns);
372 
373             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
374 
375             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
376             if (mlen != pgleft) {
377                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
378                 iter->map = NULL;
379                 return -EFAULT;
380             }
381         }
382 
383         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
384                 off_in_page);
385         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
386             memcpy(p, buf, cplen);
387         } else {
388             memcpy(buf, p, cplen);
389         }
390 
391         buf += cplen;
392         len -= cplen;
393         iter->off += cplen;
394         iter->last_off = iter->off;
395     }
396 
397     return ret;
398 }
399 
400 /*
401  * Position the iterator @iter at new offset @new_off.
402  * If this results in the cached mapping being unusable with the new offset,
403  * unmap it.
404  */
405 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
406 {
407     assert(iter->active);
408     iter->off = new_off;
409 }
410 
411 /*
412  * Start a series of i/o on the GPADL.
413  * After this i/o and seek operations on @iter become legal.
414  */
415 static inline void gpadl_iter_start_io(GpadlIter *iter)
416 {
417     assert(!iter->active);
418     /* mapping is cached lazily on i/o */
419     iter->map = NULL;
420     iter->active = true;
421 }
422 
423 /*
424  * End the eariler started series of i/o on the GPADL and release the cached
425  * mapping if any.
426  */
427 static inline void gpadl_iter_end_io(GpadlIter *iter)
428 {
429     assert(iter->active);
430 
431     if (iter->map) {
432         gpadl_iter_cache_unmap(iter);
433     }
434 
435     iter->active = false;
436 }
437 
438 static void vmbus_resched(VMBus *vmbus);
439 static void vmbus_msg_cb(void *data, int status);
440 
441 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
442                            const struct iovec *iov, size_t iov_cnt)
443 {
444     GpadlIter iter;
445     size_t i;
446     ssize_t ret = 0;
447 
448     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
449                     DMA_DIRECTION_FROM_DEVICE);
450     gpadl_iter_start_io(&iter);
451     gpadl_iter_seek(&iter, off);
452     for (i = 0; i < iov_cnt; i++) {
453         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
454         if (ret < 0) {
455             goto out;
456         }
457     }
458 out:
459     gpadl_iter_end_io(&iter);
460     return ret;
461 }
462 
463 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
464                   unsigned iov_cnt, size_t len, size_t off)
465 {
466     int ret_cnt = 0, ret;
467     unsigned i;
468     QEMUSGList *sgl = &req->sgl;
469     ScatterGatherEntry *sg = sgl->sg;
470 
471     for (i = 0; i < sgl->nsg; i++) {
472         if (sg[i].len > off) {
473             break;
474         }
475         off -= sg[i].len;
476     }
477     for (; len && i < sgl->nsg; i++) {
478         dma_addr_t mlen = MIN(sg[i].len - off, len);
479         dma_addr_t addr = sg[i].base + off;
480         len -= mlen;
481         off = 0;
482 
483         for (; mlen; ret_cnt++) {
484             dma_addr_t l = mlen;
485             dma_addr_t a = addr;
486 
487             if (ret_cnt == iov_cnt) {
488                 ret = -ENOBUFS;
489                 goto err;
490             }
491 
492             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
493             if (!l) {
494                 ret = -EFAULT;
495                 goto err;
496             }
497             iov[ret_cnt].iov_len = l;
498             addr += l;
499             mlen -= l;
500         }
501     }
502 
503     return ret_cnt;
504 err:
505     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
506     return ret;
507 }
508 
509 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
510                      unsigned iov_cnt, size_t accessed)
511 {
512     QEMUSGList *sgl = &req->sgl;
513     unsigned i;
514 
515     for (i = 0; i < iov_cnt; i++) {
516         size_t acsd = MIN(accessed, iov[i].iov_len);
517         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
518         accessed -= acsd;
519     }
520 }
521 
522 static const VMStateDescription vmstate_gpadl = {
523     .name = "vmbus/gpadl",
524     .version_id = 0,
525     .minimum_version_id = 0,
526     .fields = (VMStateField[]) {
527         VMSTATE_UINT32(id, VMBusGpadl),
528         VMSTATE_UINT32(child_relid, VMBusGpadl),
529         VMSTATE_UINT32(num_gfns, VMBusGpadl),
530         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
531         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
532                                     vmstate_info_uint64, uint64_t),
533         VMSTATE_UINT8(state, VMBusGpadl),
534         VMSTATE_END_OF_LIST()
535     }
536 };
537 
538 /*
539  * Wrap the index into a ring buffer of @len bytes.
540  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
541  * single wraparound is considered.
542  */
543 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
544 {
545     if (idx >= len) {
546         idx -= len;
547     }
548     return idx;
549 }
550 
551 /*
552  * Circular difference between two indices into a ring buffer of @len bytes.
553  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
554  * up write index but not vice versa.
555  */
556 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
557                                     bool allow_catchup)
558 {
559     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
560 }
561 
562 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
563 {
564     vmbus_ring_buffer *rb;
565     dma_addr_t mlen = sizeof(*rb);
566 
567     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
568                         DMA_DIRECTION_FROM_DEVICE);
569     if (mlen != sizeof(*rb)) {
570         dma_memory_unmap(ringbuf->as, rb, mlen,
571                          DMA_DIRECTION_FROM_DEVICE, 0);
572         return NULL;
573     }
574     return rb;
575 }
576 
577 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
578                               vmbus_ring_buffer *rb, bool dirty)
579 {
580     assert(rb);
581 
582     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
583                      dirty ? sizeof(*rb) : 0);
584 }
585 
586 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
587                                 AddressSpace *as, DMADirection dir,
588                                 uint32_t begin, uint32_t end)
589 {
590     ringbuf->as = as;
591     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
592     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
593     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
594     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
595 }
596 
597 static int ringbufs_init(VMBusChannel *chan)
598 {
599     vmbus_ring_buffer *rb;
600     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
601     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
602 
603     if (chan->ringbuf_send_offset <= 1 ||
604         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
605         return -EINVAL;
606     }
607 
608     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
609                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
610     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
611                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
612                         chan->gpadl->num_gfns);
613     send_ringbuf->wanted = 0;
614     send_ringbuf->reserved = 0;
615 
616     rb = ringbuf_map_hdr(&recv_ringbuf->common);
617     if (!rb) {
618         return -EFAULT;
619     }
620     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
621     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
622 
623     rb = ringbuf_map_hdr(&send_ringbuf->common);
624     if (!rb) {
625         return -EFAULT;
626     }
627     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
628     send_ringbuf->last_seen_rd_idx = rb->read_index;
629     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
630     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
631 
632     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
633         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
634         return -EOVERFLOW;
635     }
636 
637     return 0;
638 }
639 
640 /*
641  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
642  * around if needed.
643  * @len is assumed not to exceed the size of the ringbuffer, so only single
644  * wraparound is considered.
645  */
646 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
647 {
648     ssize_t ret1 = 0, ret2 = 0;
649     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
650 
651     if (len >= remain) {
652         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
653         if (ret1 < 0) {
654             return ret1;
655         }
656         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
657         buf += remain;
658         len -= remain;
659     }
660     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
661     if (ret2 < 0) {
662         return ret2;
663     }
664     return ret1 + ret2;
665 }
666 
667 /*
668  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
669  * around if needed.
670  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
671  * single wraparound is considered.
672  */
673 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
674 {
675     gpadl_iter_seek(&ringbuf->iter,
676                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
677 }
678 
679 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
680 {
681     return ringbuf->iter.off - ringbuf->base;
682 }
683 
684 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
685 {
686     gpadl_iter_start_io(&ringbuf->iter);
687 }
688 
689 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
690 {
691     gpadl_iter_end_io(&ringbuf->iter);
692 }
693 
694 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
695 {
696     return chan->dev;
697 }
698 
699 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
700 {
701     if (chan_idx >= dev->num_channels) {
702         return NULL;
703     }
704     return &dev->channels[chan_idx];
705 }
706 
707 uint32_t vmbus_channel_idx(VMBusChannel *chan)
708 {
709     return chan - chan->dev->channels;
710 }
711 
712 void vmbus_channel_notify_host(VMBusChannel *chan)
713 {
714     event_notifier_set(&chan->notifier);
715 }
716 
717 bool vmbus_channel_is_open(VMBusChannel *chan)
718 {
719     return chan->is_open;
720 }
721 
722 /*
723  * Notify the guest side about the data to work on in the channel ring buffer.
724  * The notification is done by signaling a dedicated per-channel SynIC event
725  * flag (more recent guests) or setting a bit in the interrupt page and firing
726  * the VMBus SINT (older guests).
727  */
728 static int vmbus_channel_notify_guest(VMBusChannel *chan)
729 {
730     int res = 0;
731     unsigned long *int_map, mask;
732     unsigned idx;
733     hwaddr addr = chan->vmbus->int_page_gpa;
734     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
735 
736     trace_vmbus_channel_notify_guest(chan->id);
737 
738     if (!addr) {
739         return hyperv_set_event_flag(chan->notify_route, chan->id);
740     }
741 
742     int_map = cpu_physical_memory_map(addr, &len, 1);
743     if (len != TARGET_PAGE_SIZE / 2) {
744         res = -ENXIO;
745         goto unmap;
746     }
747 
748     idx = BIT_WORD(chan->id);
749     mask = BIT_MASK(chan->id);
750     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
751         res = hyperv_sint_route_set_sint(chan->notify_route);
752         dirty = len;
753     }
754 
755 unmap:
756     cpu_physical_memory_unmap(int_map, len, 1, dirty);
757     return res;
758 }
759 
760 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
761 
762 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
763                                           uint32_t desclen, uint32_t msglen)
764 {
765     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
766         DIV_ROUND_UP(desclen, sizeof(uint64_t));
767     hdr->len_qwords = hdr->offset_qwords +
768         DIV_ROUND_UP(msglen, sizeof(uint64_t));
769     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
770 }
771 
772 /*
773  * Simplified ring buffer operation with paired barriers annotations in the
774  * producer and consumer loops:
775  *
776  * producer                           * consumer
777  * ~~~~~~~~                           * ~~~~~~~~
778  * write pending_send_sz              * read write_index
779  * smp_mb                       [A]   * smp_mb                       [C]
780  * read read_index                    * read packet
781  * smp_mb                       [B]   * read/write out-of-band data
782  * read/write out-of-band data        * smp_mb                       [B]
783  * write packet                       * write read_index
784  * smp_mb                       [C]   * smp_mb                       [A]
785  * write write_index                  * read pending_send_sz
786  * smp_wmb                      [D]   * smp_rmb                      [D]
787  * write pending_send_sz              * read write_index
788  * ...                                * ...
789  */
790 
791 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
792 {
793     /* don't trust guest data */
794     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
795         return 0;
796     }
797     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
798                         ringbuf->common.len, false);
799 }
800 
801 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
802 {
803     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
804     vmbus_ring_buffer *rb;
805     uint32_t written;
806 
807     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
808                            ringbuf->common.len, true);
809     if (!written) {
810         return 0;
811     }
812 
813     rb = ringbuf_map_hdr(&ringbuf->common);
814     if (!rb) {
815         return -EFAULT;
816     }
817 
818     ringbuf->reserved -= written;
819 
820     /* prevent reorder with the data operation and packet write */
821     smp_mb();                   /* barrier pair [C] */
822     rb->write_index = ringbuf->wr_idx;
823 
824     /*
825      * If the producer earlier indicated that it wants to be notified when the
826      * consumer frees certain amount of space in the ring buffer, that amount
827      * is reduced by the size of the completed write.
828      */
829     if (ringbuf->wanted) {
830         /* otherwise reservation would fail */
831         assert(ringbuf->wanted < written);
832         ringbuf->wanted -= written;
833         /* prevent reorder with write_index write */
834         smp_wmb();              /* barrier pair [D] */
835         rb->pending_send_sz = ringbuf->wanted;
836     }
837 
838     /* prevent reorder with write_index or pending_send_sz write */
839     smp_mb();                   /* barrier pair [A] */
840     ringbuf->last_seen_rd_idx = rb->read_index;
841 
842     /*
843      * The consumer may have missed the reduction of pending_send_sz and skip
844      * notification, so re-check the blocking condition, and, if it's no longer
845      * true, ensure processing another iteration by simulating consumer's
846      * notification.
847      */
848     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
849         vmbus_channel_notify_host(chan);
850     }
851 
852     /* skip notification by consumer's request */
853     if (rb->interrupt_mask) {
854         goto out;
855     }
856 
857     /*
858      * The consumer hasn't caught up with the producer's previous state so it's
859      * not blocked.
860      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
861      * validation here as it only affects notification.)
862      */
863     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
864                      ringbuf->common.len, true) > written) {
865         goto out;
866     }
867 
868     vmbus_channel_notify_guest(chan);
869 out:
870     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
871     ringbuf->last_wr_idx = ringbuf->wr_idx;
872     return written;
873 }
874 
875 int vmbus_channel_reserve(VMBusChannel *chan,
876                           uint32_t desclen, uint32_t msglen)
877 {
878     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
879     vmbus_ring_buffer *rb = NULL;
880     vmbus_packet_hdr hdr;
881     uint32_t needed = ringbuf->reserved +
882         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
883 
884     /* avoid touching the guest memory if possible */
885     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
886         goto success;
887     }
888 
889     rb = ringbuf_map_hdr(&ringbuf->common);
890     if (!rb) {
891         return -EFAULT;
892     }
893 
894     /* fetch read index from guest memory and try again */
895     ringbuf->last_seen_rd_idx = rb->read_index;
896 
897     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
898         goto success;
899     }
900 
901     rb->pending_send_sz = needed;
902 
903     /*
904      * The consumer may have made progress and freed up some space before
905      * seeing updated pending_send_sz, so re-read read_index (preventing
906      * reorder with the pending_send_sz write) and try again.
907      */
908     smp_mb();                   /* barrier pair [A] */
909     ringbuf->last_seen_rd_idx = rb->read_index;
910 
911     if (needed > ringbuf_send_avail(ringbuf)) {
912         goto out;
913     }
914 
915 success:
916     ringbuf->reserved = needed;
917     needed = 0;
918 
919     /* clear pending_send_sz if it was set */
920     if (ringbuf->wanted) {
921         if (!rb) {
922             rb = ringbuf_map_hdr(&ringbuf->common);
923             if (!rb) {
924                 /* failure to clear pending_send_sz is non-fatal */
925                 goto out;
926             }
927         }
928 
929         rb->pending_send_sz = 0;
930     }
931 
932     /* prevent reorder of the following data operation with read_index read */
933     smp_mb();                   /* barrier pair [B] */
934 
935 out:
936     if (rb) {
937         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
938     }
939     ringbuf->wanted = needed;
940     return needed ? -ENOSPC : 0;
941 }
942 
943 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
944                            void *desc, uint32_t desclen,
945                            void *msg, uint32_t msglen,
946                            bool need_comp, uint64_t transaction_id)
947 {
948     ssize_t ret = 0;
949     vmbus_packet_hdr hdr;
950     uint32_t totlen;
951     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
952 
953     if (!vmbus_channel_is_open(chan)) {
954         return -EINVAL;
955     }
956 
957     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
958     hdr.type = pkt_type;
959     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
960     hdr.transaction_id = transaction_id;
961 
962     assert(totlen <= ringbuf->reserved);
963 
964     ringbuf_start_io(&ringbuf->common);
965     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
966     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
967     if (ret < 0) {
968         goto out;
969     }
970     if (desclen) {
971         assert(desc);
972         ret = ringbuf_io(&ringbuf->common, desc, desclen);
973         if (ret < 0) {
974             goto out;
975         }
976         ringbuf_seek(&ringbuf->common,
977                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
978     }
979     ret = ringbuf_io(&ringbuf->common, msg, msglen);
980     if (ret < 0) {
981         goto out;
982     }
983     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
984     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
985     ret = 0;
986 out:
987     ringbuf_end_io(&ringbuf->common);
988     if (ret) {
989         return ret;
990     }
991     return ringbuf_send_update_idx(chan);
992 }
993 
994 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
995                                       void *msg, uint32_t msglen)
996 {
997     assert(req->need_comp);
998     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
999                               msg, msglen, false, req->transaction_id);
1000 }
1001 
1002 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1003                                VMBusRingBufCommon *ringbuf, uint32_t len)
1004 {
1005     int ret;
1006     vmbus_pkt_gpa_direct hdr;
1007     hwaddr curaddr = 0;
1008     hwaddr curlen = 0;
1009     int num;
1010 
1011     if (len < sizeof(hdr)) {
1012         return -EIO;
1013     }
1014     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1015     if (ret < 0) {
1016         return ret;
1017     }
1018     len -= sizeof(hdr);
1019 
1020     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1021     if (num < 0) {
1022         return -EIO;
1023     }
1024     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1025 
1026     for (; hdr.rangecount; hdr.rangecount--) {
1027         vmbus_gpa_range range;
1028 
1029         if (len < sizeof(range)) {
1030             goto eio;
1031         }
1032         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1033         if (ret < 0) {
1034             goto err;
1035         }
1036         len -= sizeof(range);
1037 
1038         if (range.byte_offset & TARGET_PAGE_MASK) {
1039             goto eio;
1040         }
1041 
1042         for (; range.byte_count; range.byte_offset = 0) {
1043             uint64_t paddr;
1044             uint32_t plen = MIN(range.byte_count,
1045                                 TARGET_PAGE_SIZE - range.byte_offset);
1046 
1047             if (len < sizeof(uint64_t)) {
1048                 goto eio;
1049             }
1050             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1051             if (ret < 0) {
1052                 goto err;
1053             }
1054             len -= sizeof(uint64_t);
1055             paddr <<= TARGET_PAGE_BITS;
1056             paddr |= range.byte_offset;
1057             range.byte_count -= plen;
1058 
1059             if (curaddr + curlen == paddr) {
1060                 /* consecutive fragments - join */
1061                 curlen += plen;
1062             } else {
1063                 if (curlen) {
1064                     qemu_sglist_add(sgl, curaddr, curlen);
1065                 }
1066 
1067                 curaddr = paddr;
1068                 curlen = plen;
1069             }
1070         }
1071     }
1072 
1073     if (curlen) {
1074         qemu_sglist_add(sgl, curaddr, curlen);
1075     }
1076 
1077     return 0;
1078 eio:
1079     ret = -EIO;
1080 err:
1081     qemu_sglist_destroy(sgl);
1082     return ret;
1083 }
1084 
1085 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1086                                      uint32_t size, uint16_t pkt_type,
1087                                      uint32_t msglen, uint64_t transaction_id,
1088                                      bool need_comp)
1089 {
1090     VMBusChanReq *req;
1091     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1092     uint32_t totlen = msgoff + msglen;
1093 
1094     req = g_malloc0(totlen);
1095     req->chan = chan;
1096     req->pkt_type = pkt_type;
1097     req->msg = (void *)req + msgoff;
1098     req->msglen = msglen;
1099     req->transaction_id = transaction_id;
1100     req->need_comp = need_comp;
1101     return req;
1102 }
1103 
1104 int vmbus_channel_recv_start(VMBusChannel *chan)
1105 {
1106     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1107     vmbus_ring_buffer *rb;
1108 
1109     rb = ringbuf_map_hdr(&ringbuf->common);
1110     if (!rb) {
1111         return -EFAULT;
1112     }
1113     ringbuf->last_seen_wr_idx = rb->write_index;
1114     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1115 
1116     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1117         return -EOVERFLOW;
1118     }
1119 
1120     /* prevent reorder of the following data operation with write_index read */
1121     smp_mb();                   /* barrier pair [C] */
1122     return 0;
1123 }
1124 
1125 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1126 {
1127     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1128     vmbus_packet_hdr hdr = {};
1129     VMBusChanReq *req;
1130     uint32_t avail;
1131     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1132 
1133     assert(size >= sizeof(*req));
1134 
1135     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1136     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1137                          ringbuf->common.len, true);
1138     if (avail < sizeof(hdr)) {
1139         return NULL;
1140     }
1141 
1142     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1143     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1144         return NULL;
1145     }
1146 
1147     pktlen = hdr.len_qwords * sizeof(uint64_t);
1148     totlen = pktlen + VMBUS_PKT_TRAILER;
1149     if (totlen > avail) {
1150         return NULL;
1151     }
1152 
1153     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1154     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1155         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1156         return NULL;
1157     }
1158 
1159     msglen = pktlen - msgoff;
1160 
1161     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1162                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1163 
1164     switch (hdr.type) {
1165     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1166         desclen = msgoff - sizeof(hdr);
1167         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1168                                 desclen) < 0) {
1169             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1170             goto free_req;
1171         }
1172         break;
1173     case VMBUS_PACKET_DATA_INBAND:
1174     case VMBUS_PACKET_COMP:
1175         break;
1176     default:
1177         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1178         goto free_req;
1179     }
1180 
1181     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1182     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1183         goto free_req;
1184     }
1185     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1186 
1187     return req;
1188 free_req:
1189     vmbus_free_req(req);
1190     return NULL;
1191 }
1192 
1193 void vmbus_channel_recv_pop(VMBusChannel *chan)
1194 {
1195     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1196     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1197 }
1198 
1199 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1200 {
1201     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1202     vmbus_ring_buffer *rb;
1203     uint32_t read;
1204 
1205     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1206                         ringbuf->common.len, true);
1207     if (!read) {
1208         return 0;
1209     }
1210 
1211     rb = ringbuf_map_hdr(&ringbuf->common);
1212     if (!rb) {
1213         return -EFAULT;
1214     }
1215 
1216     /* prevent reorder with the data operation and packet read */
1217     smp_mb();                   /* barrier pair [B] */
1218     rb->read_index = ringbuf->rd_idx;
1219 
1220     /* prevent reorder of the following pending_send_sz read */
1221     smp_mb();                   /* barrier pair [A] */
1222 
1223     if (rb->interrupt_mask) {
1224         goto out;
1225     }
1226 
1227     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1228         uint32_t wr_idx, wr_avail;
1229         uint32_t wanted = rb->pending_send_sz;
1230 
1231         if (!wanted) {
1232             goto out;
1233         }
1234 
1235         /* prevent reorder with pending_send_sz read */
1236         smp_rmb();              /* barrier pair [D] */
1237         wr_idx = rb->write_index;
1238 
1239         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1240                                 true);
1241 
1242         /* the producer wasn't blocked on the consumer state */
1243         if (wr_avail >= read + wanted) {
1244             goto out;
1245         }
1246         /* there's not enough space for the producer to make progress */
1247         if (wr_avail < wanted) {
1248             goto out;
1249         }
1250     }
1251 
1252     vmbus_channel_notify_guest(chan);
1253 out:
1254     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1255     ringbuf->last_rd_idx = ringbuf->rd_idx;
1256     return read;
1257 }
1258 
1259 void vmbus_free_req(void *req)
1260 {
1261     VMBusChanReq *r = req;
1262 
1263     if (!req) {
1264         return;
1265     }
1266 
1267     if (r->sgl.dev) {
1268         qemu_sglist_destroy(&r->sgl);
1269     }
1270     g_free(req);
1271 }
1272 
1273 static const VMStateDescription vmstate_sgent = {
1274     .name = "vmbus/sgentry",
1275     .version_id = 0,
1276     .minimum_version_id = 0,
1277     .fields = (VMStateField[]) {
1278         VMSTATE_UINT64(base, ScatterGatherEntry),
1279         VMSTATE_UINT64(len, ScatterGatherEntry),
1280         VMSTATE_END_OF_LIST()
1281     }
1282 };
1283 
1284 typedef struct VMBusChanReqSave {
1285     uint16_t chan_idx;
1286     uint16_t pkt_type;
1287     uint32_t msglen;
1288     void *msg;
1289     uint64_t transaction_id;
1290     bool need_comp;
1291     uint32_t num;
1292     ScatterGatherEntry *sgl;
1293 } VMBusChanReqSave;
1294 
1295 static const VMStateDescription vmstate_vmbus_chan_req = {
1296     .name = "vmbus/vmbus_chan_req",
1297     .version_id = 0,
1298     .minimum_version_id = 0,
1299     .fields = (VMStateField[]) {
1300         VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1301         VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1302         VMSTATE_UINT32(msglen, VMBusChanReqSave),
1303         VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1304         VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1305         VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1306         VMSTATE_UINT32(num, VMBusChanReqSave),
1307         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1308                                              vmstate_sgent, ScatterGatherEntry),
1309         VMSTATE_END_OF_LIST()
1310     }
1311 };
1312 
1313 void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1314 {
1315     VMBusChanReqSave req_save;
1316 
1317     req_save.chan_idx = req->chan->subchan_idx;
1318     req_save.pkt_type = req->pkt_type;
1319     req_save.msglen = req->msglen;
1320     req_save.msg = req->msg;
1321     req_save.transaction_id = req->transaction_id;
1322     req_save.need_comp = req->need_comp;
1323     req_save.num = req->sgl.nsg;
1324     req_save.sgl = g_memdup(req->sgl.sg,
1325                             req_save.num * sizeof(ScatterGatherEntry));
1326 
1327     vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1328 
1329     g_free(req_save.sgl);
1330 }
1331 
1332 void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1333 {
1334     VMBusChanReqSave req_save;
1335     VMBusChanReq *req = NULL;
1336     VMBusChannel *chan = NULL;
1337     uint32_t i;
1338 
1339     vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1340 
1341     if (req_save.chan_idx >= dev->num_channels) {
1342         error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1343                      req_save.chan_idx, dev->num_channels);
1344         goto out;
1345     }
1346     chan = &dev->channels[req_save.chan_idx];
1347 
1348     if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1349         goto out;
1350     }
1351 
1352     req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1353                           req_save.transaction_id, req_save.need_comp);
1354     if (req_save.msglen) {
1355         memcpy(req->msg, req_save.msg, req_save.msglen);
1356     }
1357 
1358     for (i = 0; i < req_save.num; i++) {
1359         qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1360     }
1361 
1362 out:
1363     if (req_save.msglen) {
1364         g_free(req_save.msg);
1365     }
1366     if (req_save.num) {
1367         g_free(req_save.sgl);
1368     }
1369     return req;
1370 }
1371 
1372 static void channel_event_cb(EventNotifier *e)
1373 {
1374     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1375     if (event_notifier_test_and_clear(e)) {
1376         /*
1377          * All receives are supposed to happen within the device worker, so
1378          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1379          * potentially reuse the cached mapping throughout the worker.
1380          * Can't do this for sends as they may happen outside the device
1381          * worker.
1382          */
1383         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1384         ringbuf_start_io(&ringbuf->common);
1385         chan->notify_cb(chan);
1386         ringbuf_end_io(&ringbuf->common);
1387 
1388     }
1389 }
1390 
1391 static int alloc_chan_id(VMBus *vmbus)
1392 {
1393     int ret;
1394 
1395     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1396     if (ret == VMBUS_CHANID_COUNT) {
1397         return -ENOMEM;
1398     }
1399     return ret + VMBUS_FIRST_CHANID;
1400 }
1401 
1402 static int register_chan_id(VMBusChannel *chan)
1403 {
1404     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1405                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1406 }
1407 
1408 static void unregister_chan_id(VMBusChannel *chan)
1409 {
1410     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1411 }
1412 
1413 static uint32_t chan_connection_id(VMBusChannel *chan)
1414 {
1415     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1416 }
1417 
1418 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1419                          VMBusChannel *chan, uint16_t idx, Error **errp)
1420 {
1421     int res;
1422 
1423     chan->dev = dev;
1424     chan->notify_cb = vdc->chan_notify_cb;
1425     chan->subchan_idx = idx;
1426     chan->vmbus = vmbus;
1427 
1428     res = alloc_chan_id(vmbus);
1429     if (res < 0) {
1430         error_setg(errp, "no spare channel id");
1431         return;
1432     }
1433     chan->id = res;
1434     register_chan_id(chan);
1435 
1436     /*
1437      * The guest drivers depend on the device subchannels (idx #1+) to be
1438      * offered after the primary channel (idx #0) of that device.  To ensure
1439      * that, record the channels on the channel list in the order they appear
1440      * within the device.
1441      */
1442     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1443 }
1444 
1445 static void deinit_channel(VMBusChannel *chan)
1446 {
1447     assert(chan->state == VMCHAN_INIT);
1448     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1449     unregister_chan_id(chan);
1450 }
1451 
1452 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1453 {
1454     uint16_t i;
1455     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1456     Error *err = NULL;
1457 
1458     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1459     if (dev->num_channels < 1) {
1460         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1461         return;
1462     }
1463 
1464     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1465     for (i = 0; i < dev->num_channels; i++) {
1466         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1467         if (err) {
1468             goto err_init;
1469         }
1470     }
1471 
1472     return;
1473 
1474 err_init:
1475     while (i--) {
1476         deinit_channel(&dev->channels[i]);
1477     }
1478     error_propagate(errp, err);
1479 }
1480 
1481 static void free_channels(VMBusDevice *dev)
1482 {
1483     uint16_t i;
1484     for (i = 0; i < dev->num_channels; i++) {
1485         deinit_channel(&dev->channels[i]);
1486     }
1487     g_free(dev->channels);
1488 }
1489 
1490 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1491 {
1492     VMBusChannel *chan;
1493 
1494     if (vp_index == vmbus->target_vp) {
1495         hyperv_sint_route_ref(vmbus->sint_route);
1496         return vmbus->sint_route;
1497     }
1498 
1499     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1500         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1501             hyperv_sint_route_ref(chan->notify_route);
1502             return chan->notify_route;
1503         }
1504     }
1505 
1506     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1507 }
1508 
1509 static void open_channel(VMBusChannel *chan)
1510 {
1511     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1512 
1513     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1514     if (!chan->gpadl) {
1515         return;
1516     }
1517 
1518     if (ringbufs_init(chan)) {
1519         goto put_gpadl;
1520     }
1521 
1522     if (event_notifier_init(&chan->notifier, 0)) {
1523         goto put_gpadl;
1524     }
1525 
1526     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1527 
1528     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1529                                       &chan->notifier)) {
1530         goto cleanup_notifier;
1531     }
1532 
1533     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1534     if (!chan->notify_route) {
1535         goto clear_event_flag_handler;
1536     }
1537 
1538     if (vdc->open_channel && vdc->open_channel(chan)) {
1539         goto unref_sint_route;
1540     }
1541 
1542     chan->is_open = true;
1543     return;
1544 
1545 unref_sint_route:
1546     hyperv_sint_route_unref(chan->notify_route);
1547 clear_event_flag_handler:
1548     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1549 cleanup_notifier:
1550     event_notifier_set_handler(&chan->notifier, NULL);
1551     event_notifier_cleanup(&chan->notifier);
1552 put_gpadl:
1553     vmbus_put_gpadl(chan->gpadl);
1554 }
1555 
1556 static void close_channel(VMBusChannel *chan)
1557 {
1558     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1559 
1560     if (!chan->is_open) {
1561         return;
1562     }
1563 
1564     if (vdc->close_channel) {
1565         vdc->close_channel(chan);
1566     }
1567 
1568     hyperv_sint_route_unref(chan->notify_route);
1569     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1570     event_notifier_set_handler(&chan->notifier, NULL);
1571     event_notifier_cleanup(&chan->notifier);
1572     vmbus_put_gpadl(chan->gpadl);
1573     chan->is_open = false;
1574 }
1575 
1576 static int channel_post_load(void *opaque, int version_id)
1577 {
1578     VMBusChannel *chan = opaque;
1579 
1580     return register_chan_id(chan);
1581 }
1582 
1583 static const VMStateDescription vmstate_channel = {
1584     .name = "vmbus/channel",
1585     .version_id = 0,
1586     .minimum_version_id = 0,
1587     .post_load = channel_post_load,
1588     .fields = (VMStateField[]) {
1589         VMSTATE_UINT32(id, VMBusChannel),
1590         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1591         VMSTATE_UINT32(open_id, VMBusChannel),
1592         VMSTATE_UINT32(target_vp, VMBusChannel),
1593         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1594         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1595         VMSTATE_UINT8(offer_state, VMBusChannel),
1596         VMSTATE_UINT8(state, VMBusChannel),
1597         VMSTATE_END_OF_LIST()
1598     }
1599 };
1600 
1601 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1602 {
1603     VMBusChannel *chan;
1604     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1605         if (chan->id == id) {
1606             return chan;
1607         }
1608     }
1609     return NULL;
1610 }
1611 
1612 static int enqueue_incoming_message(VMBus *vmbus,
1613                                     const struct hyperv_post_message_input *msg)
1614 {
1615     int ret = 0;
1616     uint8_t idx, prev_size;
1617 
1618     qemu_mutex_lock(&vmbus->rx_queue_lock);
1619 
1620     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1621         ret = -ENOBUFS;
1622         goto out;
1623     }
1624 
1625     prev_size = vmbus->rx_queue_size;
1626     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1627     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1628     vmbus->rx_queue_size++;
1629 
1630     /* only need to resched if the queue was empty before */
1631     if (!prev_size) {
1632         vmbus_resched(vmbus);
1633     }
1634 out:
1635     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1636     return ret;
1637 }
1638 
1639 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1640                                    void *data)
1641 {
1642     VMBus *vmbus = data;
1643     struct vmbus_message_header *vmbus_msg;
1644 
1645     if (msg->message_type != HV_MESSAGE_VMBUS) {
1646         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1647     }
1648 
1649     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1650         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1651     }
1652 
1653     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1654 
1655     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1656 
1657     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1658         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1659         error_report("vmbus: unknown message type %#x",
1660                      vmbus_msg->message_type);
1661         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1662     }
1663 
1664     if (enqueue_incoming_message(vmbus, msg)) {
1665         return HV_STATUS_INSUFFICIENT_BUFFERS;
1666     }
1667     return HV_STATUS_SUCCESS;
1668 }
1669 
1670 static bool vmbus_initialized(VMBus *vmbus)
1671 {
1672     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1673 }
1674 
1675 static void vmbus_reset_all(VMBus *vmbus)
1676 {
1677     qbus_reset_all(BUS(vmbus));
1678 }
1679 
1680 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1681 {
1682     int ret;
1683     struct hyperv_message msg = {
1684         .header.message_type = HV_MESSAGE_VMBUS,
1685     };
1686 
1687     assert(!vmbus->msg_in_progress);
1688     assert(msglen <= sizeof(msg.payload));
1689     assert(msglen >= sizeof(struct vmbus_message_header));
1690 
1691     vmbus->msg_in_progress = true;
1692 
1693     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1694                          msglen);
1695 
1696     memcpy(msg.payload, msgdata, msglen);
1697     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1698 
1699     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1700     if (ret == 0 || ret == -EAGAIN) {
1701         return;
1702     }
1703 
1704     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1705     vmbus_reset_all(vmbus);
1706 }
1707 
1708 static int vmbus_init(VMBus *vmbus)
1709 {
1710     if (vmbus->target_vp != (uint32_t)-1) {
1711         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1712                                                   vmbus_msg_cb, vmbus);
1713         if (!vmbus->sint_route) {
1714             error_report("failed to set up SINT route");
1715             return -ENOMEM;
1716         }
1717     }
1718     return 0;
1719 }
1720 
1721 static void vmbus_deinit(VMBus *vmbus)
1722 {
1723     VMBusGpadl *gpadl, *tmp_gpadl;
1724     VMBusChannel *chan;
1725 
1726     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1727         if (gpadl->state == VMGPADL_TORNDOWN) {
1728             continue;
1729         }
1730         vmbus_put_gpadl(gpadl);
1731     }
1732 
1733     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1734         chan->offer_state = VMOFFER_INIT;
1735     }
1736 
1737     hyperv_sint_route_unref(vmbus->sint_route);
1738     vmbus->sint_route = NULL;
1739     vmbus->int_page_gpa = 0;
1740     vmbus->target_vp = (uint32_t)-1;
1741     vmbus->version = 0;
1742     vmbus->state = VMBUS_LISTEN;
1743     vmbus->msg_in_progress = false;
1744 }
1745 
1746 static void handle_initiate_contact(VMBus *vmbus,
1747                                     vmbus_message_initiate_contact *msg,
1748                                     uint32_t msglen)
1749 {
1750     if (msglen < sizeof(*msg)) {
1751         return;
1752     }
1753 
1754     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1755                                  msg->version_requested & 0xffff,
1756                                  msg->target_vcpu, msg->monitor_page1,
1757                                  msg->monitor_page2, msg->interrupt_page);
1758 
1759     /*
1760      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1761      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1762      * before handing over to OS loader.
1763      */
1764     vmbus_reset_all(vmbus);
1765 
1766     vmbus->target_vp = msg->target_vcpu;
1767     vmbus->version = msg->version_requested;
1768     if (vmbus->version < VMBUS_VERSION_WIN8) {
1769         /* linux passes interrupt page even when it doesn't need it */
1770         vmbus->int_page_gpa = msg->interrupt_page;
1771     }
1772     vmbus->state = VMBUS_HANDSHAKE;
1773 
1774     if (vmbus_init(vmbus)) {
1775         error_report("failed to init vmbus; aborting");
1776         vmbus_deinit(vmbus);
1777         return;
1778     }
1779 }
1780 
1781 static void send_handshake(VMBus *vmbus)
1782 {
1783     struct vmbus_message_version_response msg = {
1784         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1785         .version_supported = vmbus_initialized(vmbus),
1786     };
1787 
1788     post_msg(vmbus, &msg, sizeof(msg));
1789 }
1790 
1791 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1792 {
1793     VMBusChannel *chan;
1794 
1795     if (!vmbus_initialized(vmbus)) {
1796         return;
1797     }
1798 
1799     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1800         if (chan->offer_state == VMOFFER_INIT) {
1801             chan->offer_state = VMOFFER_SENDING;
1802             break;
1803         }
1804     }
1805 
1806     vmbus->state = VMBUS_OFFER;
1807 }
1808 
1809 static void send_offer(VMBus *vmbus)
1810 {
1811     VMBusChannel *chan;
1812     struct vmbus_message_header alloffers_msg = {
1813         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1814     };
1815 
1816     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1817         if (chan->offer_state == VMOFFER_SENDING) {
1818             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1819             /* Hyper-V wants LE GUIDs */
1820             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1821             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1822             struct vmbus_message_offer_channel msg = {
1823                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1824                 .child_relid = chan->id,
1825                 .connection_id = chan_connection_id(chan),
1826                 .channel_flags = vdc->channel_flags,
1827                 .mmio_size_mb = vdc->mmio_size_mb,
1828                 .sub_channel_index = vmbus_channel_idx(chan),
1829                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1830             };
1831 
1832             memcpy(msg.type_uuid, &classid, sizeof(classid));
1833             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1834 
1835             trace_vmbus_send_offer(chan->id, chan->dev);
1836 
1837             post_msg(vmbus, &msg, sizeof(msg));
1838             return;
1839         }
1840     }
1841 
1842     /* no more offers, send terminator message */
1843     trace_vmbus_terminate_offers();
1844     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1845 }
1846 
1847 static bool complete_offer(VMBus *vmbus)
1848 {
1849     VMBusChannel *chan;
1850 
1851     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1852         if (chan->offer_state == VMOFFER_SENDING) {
1853             chan->offer_state = VMOFFER_SENT;
1854             goto next_offer;
1855         }
1856     }
1857     /*
1858      * no transitioning channels found so this is completing the terminator
1859      * message, and vmbus can move to the next state
1860      */
1861     return true;
1862 
1863 next_offer:
1864     /* try to mark another channel for offering */
1865     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1866         if (chan->offer_state == VMOFFER_INIT) {
1867             chan->offer_state = VMOFFER_SENDING;
1868             break;
1869         }
1870     }
1871     /*
1872      * if an offer has been sent there are more offers or the terminator yet to
1873      * send, so no state transition for vmbus
1874      */
1875     return false;
1876 }
1877 
1878 
1879 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1880                                 uint32_t msglen)
1881 {
1882     VMBusGpadl *gpadl;
1883     uint32_t num_gfns, i;
1884 
1885     /* must include at least one gpa range */
1886     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1887         !vmbus_initialized(vmbus)) {
1888         return;
1889     }
1890 
1891     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1892                sizeof(msg->range[0].pfn_array[0]);
1893 
1894     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1895 
1896     /*
1897      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1898      * ranges each with arbitrary size and alignment.  However in practice only
1899      * single-range page-aligned GPADLs have been observed so just ignore
1900      * anything else and simplify things greatly.
1901      */
1902     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1903         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1904         return;
1905     }
1906 
1907     /* ignore requests to create already existing GPADLs */
1908     if (find_gpadl(vmbus, msg->gpadl_id)) {
1909         return;
1910     }
1911 
1912     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1913 
1914     for (i = 0; i < num_gfns &&
1915          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1916          i++) {
1917         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1918     }
1919 
1920     if (gpadl_full(gpadl)) {
1921         vmbus->state = VMBUS_CREATE_GPADL;
1922     }
1923 }
1924 
1925 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1926                               uint32_t msglen)
1927 {
1928     VMBusGpadl *gpadl;
1929     uint32_t num_gfns_left, i;
1930 
1931     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1932         return;
1933     }
1934 
1935     trace_vmbus_gpadl_body(msg->gpadl_id);
1936 
1937     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1938     if (!gpadl) {
1939         return;
1940     }
1941 
1942     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1943     assert(num_gfns_left);
1944 
1945     for (i = 0; i < num_gfns_left &&
1946          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1947         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1948     }
1949 
1950     if (gpadl_full(gpadl)) {
1951         vmbus->state = VMBUS_CREATE_GPADL;
1952     }
1953 }
1954 
1955 static void send_create_gpadl(VMBus *vmbus)
1956 {
1957     VMBusGpadl *gpadl;
1958 
1959     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1960         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1961             struct vmbus_message_gpadl_created msg = {
1962                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1963                 .gpadl_id = gpadl->id,
1964                 .child_relid = gpadl->child_relid,
1965             };
1966 
1967             trace_vmbus_gpadl_created(gpadl->id);
1968             post_msg(vmbus, &msg, sizeof(msg));
1969             return;
1970         }
1971     }
1972 
1973     assert(false);
1974 }
1975 
1976 static bool complete_create_gpadl(VMBus *vmbus)
1977 {
1978     VMBusGpadl *gpadl;
1979 
1980     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1981         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1982             gpadl->state = VMGPADL_ALIVE;
1983 
1984             return true;
1985         }
1986     }
1987 
1988     assert(false);
1989     return false;
1990 }
1991 
1992 static void handle_gpadl_teardown(VMBus *vmbus,
1993                                   vmbus_message_gpadl_teardown *msg,
1994                                   uint32_t msglen)
1995 {
1996     VMBusGpadl *gpadl;
1997 
1998     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1999         return;
2000     }
2001 
2002     trace_vmbus_gpadl_teardown(msg->gpadl_id);
2003 
2004     gpadl = find_gpadl(vmbus, msg->gpadl_id);
2005     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2006         return;
2007     }
2008 
2009     gpadl->state = VMGPADL_TEARINGDOWN;
2010     vmbus->state = VMBUS_TEARDOWN_GPADL;
2011 }
2012 
2013 static void send_teardown_gpadl(VMBus *vmbus)
2014 {
2015     VMBusGpadl *gpadl;
2016 
2017     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2018         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2019             struct vmbus_message_gpadl_torndown msg = {
2020                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2021                 .gpadl_id = gpadl->id,
2022             };
2023 
2024             trace_vmbus_gpadl_torndown(gpadl->id);
2025             post_msg(vmbus, &msg, sizeof(msg));
2026             return;
2027         }
2028     }
2029 
2030     assert(false);
2031 }
2032 
2033 static bool complete_teardown_gpadl(VMBus *vmbus)
2034 {
2035     VMBusGpadl *gpadl;
2036 
2037     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2038         if (gpadl->state == VMGPADL_TEARINGDOWN) {
2039             gpadl->state = VMGPADL_TORNDOWN;
2040             vmbus_put_gpadl(gpadl);
2041             return true;
2042         }
2043     }
2044 
2045     assert(false);
2046     return false;
2047 }
2048 
2049 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2050                                 uint32_t msglen)
2051 {
2052     VMBusChannel *chan;
2053 
2054     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2055         return;
2056     }
2057 
2058     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2059                              msg->target_vp);
2060     chan = find_channel(vmbus, msg->child_relid);
2061     if (!chan || chan->state != VMCHAN_INIT) {
2062         return;
2063     }
2064 
2065     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2066     chan->ringbuf_send_offset = msg->ring_buffer_offset;
2067     chan->target_vp = msg->target_vp;
2068     chan->open_id = msg->open_id;
2069 
2070     open_channel(chan);
2071 
2072     chan->state = VMCHAN_OPENING;
2073     vmbus->state = VMBUS_OPEN_CHANNEL;
2074 }
2075 
2076 static void send_open_channel(VMBus *vmbus)
2077 {
2078     VMBusChannel *chan;
2079 
2080     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2081         if (chan->state == VMCHAN_OPENING) {
2082             struct vmbus_message_open_result msg = {
2083                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2084                 .child_relid = chan->id,
2085                 .open_id = chan->open_id,
2086                 .status = !vmbus_channel_is_open(chan),
2087             };
2088 
2089             trace_vmbus_channel_open(chan->id, msg.status);
2090             post_msg(vmbus, &msg, sizeof(msg));
2091             return;
2092         }
2093     }
2094 
2095     assert(false);
2096 }
2097 
2098 static bool complete_open_channel(VMBus *vmbus)
2099 {
2100     VMBusChannel *chan;
2101 
2102     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2103         if (chan->state == VMCHAN_OPENING) {
2104             if (vmbus_channel_is_open(chan)) {
2105                 chan->state = VMCHAN_OPEN;
2106                 /*
2107                  * simulate guest notification of ringbuffer space made
2108                  * available, for the channel protocols where the host
2109                  * initiates the communication
2110                  */
2111                 vmbus_channel_notify_host(chan);
2112             } else {
2113                 chan->state = VMCHAN_INIT;
2114             }
2115             return true;
2116         }
2117     }
2118 
2119     assert(false);
2120     return false;
2121 }
2122 
2123 static void vdev_reset_on_close(VMBusDevice *vdev)
2124 {
2125     uint16_t i;
2126 
2127     for (i = 0; i < vdev->num_channels; i++) {
2128         if (vmbus_channel_is_open(&vdev->channels[i])) {
2129             return;
2130         }
2131     }
2132 
2133     /* all channels closed -- reset device */
2134     qdev_reset_all(DEVICE(vdev));
2135 }
2136 
2137 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2138                                  uint32_t msglen)
2139 {
2140     VMBusChannel *chan;
2141 
2142     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2143         return;
2144     }
2145 
2146     trace_vmbus_close_channel(msg->child_relid);
2147 
2148     chan = find_channel(vmbus, msg->child_relid);
2149     if (!chan) {
2150         return;
2151     }
2152 
2153     close_channel(chan);
2154     chan->state = VMCHAN_INIT;
2155 
2156     vdev_reset_on_close(chan->dev);
2157 }
2158 
2159 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2160 {
2161     vmbus->state = VMBUS_UNLOAD;
2162 }
2163 
2164 static void send_unload(VMBus *vmbus)
2165 {
2166     vmbus_message_header msg = {
2167         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2168     };
2169 
2170     qemu_mutex_lock(&vmbus->rx_queue_lock);
2171     vmbus->rx_queue_size = 0;
2172     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2173 
2174     post_msg(vmbus, &msg, sizeof(msg));
2175     return;
2176 }
2177 
2178 static bool complete_unload(VMBus *vmbus)
2179 {
2180     vmbus_reset_all(vmbus);
2181     return true;
2182 }
2183 
2184 static void process_message(VMBus *vmbus)
2185 {
2186     struct hyperv_post_message_input *hv_msg;
2187     struct vmbus_message_header *msg;
2188     void *msgdata;
2189     uint32_t msglen;
2190 
2191     qemu_mutex_lock(&vmbus->rx_queue_lock);
2192 
2193     if (!vmbus->rx_queue_size) {
2194         goto unlock;
2195     }
2196 
2197     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2198     msglen =  hv_msg->payload_size;
2199     if (msglen < sizeof(*msg)) {
2200         goto out;
2201     }
2202     msgdata = hv_msg->payload;
2203     msg = (struct vmbus_message_header *)msgdata;
2204 
2205     trace_vmbus_process_incoming_message(msg->message_type);
2206 
2207     switch (msg->message_type) {
2208     case VMBUS_MSG_INITIATE_CONTACT:
2209         handle_initiate_contact(vmbus, msgdata, msglen);
2210         break;
2211     case VMBUS_MSG_REQUESTOFFERS:
2212         handle_request_offers(vmbus, msgdata, msglen);
2213         break;
2214     case VMBUS_MSG_GPADL_HEADER:
2215         handle_gpadl_header(vmbus, msgdata, msglen);
2216         break;
2217     case VMBUS_MSG_GPADL_BODY:
2218         handle_gpadl_body(vmbus, msgdata, msglen);
2219         break;
2220     case VMBUS_MSG_GPADL_TEARDOWN:
2221         handle_gpadl_teardown(vmbus, msgdata, msglen);
2222         break;
2223     case VMBUS_MSG_OPENCHANNEL:
2224         handle_open_channel(vmbus, msgdata, msglen);
2225         break;
2226     case VMBUS_MSG_CLOSECHANNEL:
2227         handle_close_channel(vmbus, msgdata, msglen);
2228         break;
2229     case VMBUS_MSG_UNLOAD:
2230         handle_unload(vmbus, msgdata, msglen);
2231         break;
2232     default:
2233         error_report("unknown message type %#x", msg->message_type);
2234         break;
2235     }
2236 
2237 out:
2238     vmbus->rx_queue_size--;
2239     vmbus->rx_queue_head++;
2240     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2241 
2242     vmbus_resched(vmbus);
2243 unlock:
2244     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2245 }
2246 
2247 static const struct {
2248     void (*run)(VMBus *vmbus);
2249     bool (*complete)(VMBus *vmbus);
2250 } state_runner[] = {
2251     [VMBUS_LISTEN]         = {process_message,     NULL},
2252     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2253     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2254     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2255     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2256     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2257     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2258 };
2259 
2260 static void vmbus_do_run(VMBus *vmbus)
2261 {
2262     if (vmbus->msg_in_progress) {
2263         return;
2264     }
2265 
2266     assert(vmbus->state < VMBUS_STATE_MAX);
2267     assert(state_runner[vmbus->state].run);
2268     state_runner[vmbus->state].run(vmbus);
2269 }
2270 
2271 static void vmbus_run(void *opaque)
2272 {
2273     VMBus *vmbus = opaque;
2274 
2275     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2276     if (vmbus->in_progress) {
2277         return;
2278     }
2279 
2280     vmbus->in_progress = true;
2281     /*
2282      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2283      * should go *after* the code that can result in aio_poll; otherwise
2284      * reschedules can be missed.  No idea how to enforce that.
2285      */
2286     vmbus_do_run(vmbus);
2287     vmbus->in_progress = false;
2288 }
2289 
2290 static void vmbus_msg_cb(void *data, int status)
2291 {
2292     VMBus *vmbus = data;
2293     bool (*complete)(VMBus *vmbus);
2294 
2295     assert(vmbus->msg_in_progress);
2296 
2297     trace_vmbus_msg_cb(status);
2298 
2299     if (status == -EAGAIN) {
2300         goto out;
2301     }
2302     if (status) {
2303         error_report("message delivery fatal failure: %d; aborting vmbus",
2304                      status);
2305         vmbus_reset_all(vmbus);
2306         return;
2307     }
2308 
2309     assert(vmbus->state < VMBUS_STATE_MAX);
2310     complete = state_runner[vmbus->state].complete;
2311     if (!complete || complete(vmbus)) {
2312         vmbus->state = VMBUS_LISTEN;
2313     }
2314 out:
2315     vmbus->msg_in_progress = false;
2316     vmbus_resched(vmbus);
2317 }
2318 
2319 static void vmbus_resched(VMBus *vmbus)
2320 {
2321     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2322 }
2323 
2324 static void vmbus_signal_event(EventNotifier *e)
2325 {
2326     VMBusChannel *chan;
2327     VMBus *vmbus = container_of(e, VMBus, notifier);
2328     unsigned long *int_map;
2329     hwaddr addr, len;
2330     bool is_dirty = false;
2331 
2332     if (!event_notifier_test_and_clear(e)) {
2333         return;
2334     }
2335 
2336     trace_vmbus_signal_event();
2337 
2338     if (!vmbus->int_page_gpa) {
2339         return;
2340     }
2341 
2342     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2343     len = TARGET_PAGE_SIZE / 2;
2344     int_map = cpu_physical_memory_map(addr, &len, 1);
2345     if (len != TARGET_PAGE_SIZE / 2) {
2346         goto unmap;
2347     }
2348 
2349     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2350         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2351             if (!vmbus_channel_is_open(chan)) {
2352                 continue;
2353             }
2354             vmbus_channel_notify_host(chan);
2355             is_dirty = true;
2356         }
2357     }
2358 
2359 unmap:
2360     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2361 }
2362 
2363 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2364 {
2365     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2366     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2367     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2368     BusChild *child;
2369     Error *err = NULL;
2370     char idstr[UUID_FMT_LEN + 1];
2371 
2372     assert(!qemu_uuid_is_null(&vdev->instanceid));
2373 
2374     /* Check for instance id collision for this class id */
2375     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2376         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2377 
2378         if (child_dev == vdev) {
2379             continue;
2380         }
2381 
2382         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2383             qemu_uuid_unparse(&vdev->instanceid, idstr);
2384             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2385             goto error_out;
2386         }
2387     }
2388 
2389     vdev->dma_as = &address_space_memory;
2390 
2391     create_channels(vmbus, vdev, &err);
2392     if (err) {
2393         goto error_out;
2394     }
2395 
2396     if (vdc->vmdev_realize) {
2397         vdc->vmdev_realize(vdev, &err);
2398         if (err) {
2399             goto err_vdc_realize;
2400         }
2401     }
2402     return;
2403 
2404 err_vdc_realize:
2405     free_channels(vdev);
2406 error_out:
2407     error_propagate(errp, err);
2408 }
2409 
2410 static void vmbus_dev_reset(DeviceState *dev)
2411 {
2412     uint16_t i;
2413     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2414     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2415 
2416     if (vdev->channels) {
2417         for (i = 0; i < vdev->num_channels; i++) {
2418             VMBusChannel *chan = &vdev->channels[i];
2419             close_channel(chan);
2420             chan->state = VMCHAN_INIT;
2421         }
2422     }
2423 
2424     if (vdc->vmdev_reset) {
2425         vdc->vmdev_reset(vdev);
2426     }
2427 }
2428 
2429 static void vmbus_dev_unrealize(DeviceState *dev)
2430 {
2431     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2432     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2433 
2434     if (vdc->vmdev_unrealize) {
2435         vdc->vmdev_unrealize(vdev);
2436     }
2437     free_channels(vdev);
2438 }
2439 
2440 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2441 {
2442     DeviceClass *kdev = DEVICE_CLASS(klass);
2443     kdev->bus_type = TYPE_VMBUS;
2444     kdev->realize = vmbus_dev_realize;
2445     kdev->unrealize = vmbus_dev_unrealize;
2446     kdev->reset = vmbus_dev_reset;
2447 }
2448 
2449 static Property vmbus_dev_instanceid =
2450                         DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
2451 
2452 static void vmbus_dev_instance_init(Object *obj)
2453 {
2454     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2455     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2456 
2457     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2458         /* Class wants to only have a single instance with a fixed UUID */
2459         vdev->instanceid = vdc->instanceid;
2460     } else {
2461         qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
2462     }
2463 }
2464 
2465 const VMStateDescription vmstate_vmbus_dev = {
2466     .name = TYPE_VMBUS_DEVICE,
2467     .version_id = 0,
2468     .minimum_version_id = 0,
2469     .fields = (VMStateField[]) {
2470         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2471         VMSTATE_UINT16(num_channels, VMBusDevice),
2472         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2473                                              num_channels, vmstate_channel,
2474                                              VMBusChannel),
2475         VMSTATE_END_OF_LIST()
2476     }
2477 };
2478 
2479 /* vmbus generic device base */
2480 static const TypeInfo vmbus_dev_type_info = {
2481     .name = TYPE_VMBUS_DEVICE,
2482     .parent = TYPE_DEVICE,
2483     .abstract = true,
2484     .instance_size = sizeof(VMBusDevice),
2485     .class_size = sizeof(VMBusDeviceClass),
2486     .class_init = vmbus_dev_class_init,
2487     .instance_init = vmbus_dev_instance_init,
2488 };
2489 
2490 static void vmbus_realize(BusState *bus, Error **errp)
2491 {
2492     int ret = 0;
2493     Error *local_err = NULL;
2494     VMBus *vmbus = VMBUS(bus);
2495 
2496     qemu_mutex_init(&vmbus->rx_queue_lock);
2497 
2498     QTAILQ_INIT(&vmbus->gpadl_list);
2499     QTAILQ_INIT(&vmbus->channel_list);
2500 
2501     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2502                                  vmbus_recv_message, vmbus);
2503     if (ret != 0) {
2504         error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2505         goto error_out;
2506     }
2507 
2508     ret = event_notifier_init(&vmbus->notifier, 0);
2509     if (ret != 0) {
2510         error_setg(&local_err, "event notifier failed to init with %d", ret);
2511         goto remove_msg_handler;
2512     }
2513 
2514     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2515     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2516                                         &vmbus->notifier);
2517     if (ret != 0) {
2518         error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2519         goto clear_event_notifier;
2520     }
2521 
2522     return;
2523 
2524 clear_event_notifier:
2525     event_notifier_cleanup(&vmbus->notifier);
2526 remove_msg_handler:
2527     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2528 error_out:
2529     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2530     error_propagate(errp, local_err);
2531 }
2532 
2533 static void vmbus_unrealize(BusState *bus)
2534 {
2535     VMBus *vmbus = VMBUS(bus);
2536 
2537     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2538     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2539     event_notifier_cleanup(&vmbus->notifier);
2540 
2541     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2542 }
2543 
2544 static void vmbus_reset(BusState *bus)
2545 {
2546     vmbus_deinit(VMBUS(bus));
2547 }
2548 
2549 static char *vmbus_get_dev_path(DeviceState *dev)
2550 {
2551     BusState *bus = qdev_get_parent_bus(dev);
2552     return qdev_get_dev_path(bus->parent);
2553 }
2554 
2555 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2556 {
2557     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2558     char uuid[UUID_FMT_LEN + 1];
2559 
2560     qemu_uuid_unparse(&vdev->instanceid, uuid);
2561     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2562 }
2563 
2564 static void vmbus_class_init(ObjectClass *klass, void *data)
2565 {
2566     BusClass *k = BUS_CLASS(klass);
2567 
2568     k->get_dev_path = vmbus_get_dev_path;
2569     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2570     k->realize = vmbus_realize;
2571     k->unrealize = vmbus_unrealize;
2572     k->reset = vmbus_reset;
2573 }
2574 
2575 static int vmbus_pre_load(void *opaque)
2576 {
2577     VMBusChannel *chan;
2578     VMBus *vmbus = VMBUS(opaque);
2579 
2580     /*
2581      * channel IDs allocated by the source will come in the migration stream
2582      * for each channel, so clean up the ones allocated at realize
2583      */
2584     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2585         unregister_chan_id(chan);
2586     }
2587 
2588     return 0;
2589 }
2590 static int vmbus_post_load(void *opaque, int version_id)
2591 {
2592     int ret;
2593     VMBus *vmbus = VMBUS(opaque);
2594     VMBusGpadl *gpadl;
2595     VMBusChannel *chan;
2596 
2597     ret = vmbus_init(vmbus);
2598     if (ret) {
2599         return ret;
2600     }
2601 
2602     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2603         gpadl->vmbus = vmbus;
2604         gpadl->refcount = 1;
2605     }
2606 
2607     /*
2608      * reopening channels depends on initialized vmbus so it's done here
2609      * instead of channel_post_load()
2610      */
2611     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2612 
2613         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2614             open_channel(chan);
2615         }
2616 
2617         if (chan->state != VMCHAN_OPEN) {
2618             continue;
2619         }
2620 
2621         if (!vmbus_channel_is_open(chan)) {
2622             /* reopen failed, abort loading */
2623             return -1;
2624         }
2625 
2626         /* resume processing on the guest side if it missed the notification */
2627         hyperv_sint_route_set_sint(chan->notify_route);
2628         /* ditto on the host side */
2629         vmbus_channel_notify_host(chan);
2630     }
2631 
2632     vmbus_resched(vmbus);
2633     return 0;
2634 }
2635 
2636 static const VMStateDescription vmstate_post_message_input = {
2637     .name = "vmbus/hyperv_post_message_input",
2638     .version_id = 0,
2639     .minimum_version_id = 0,
2640     .fields = (VMStateField[]) {
2641         /*
2642          * skip connection_id and message_type as they are validated before
2643          * queueing and ignored on dequeueing
2644          */
2645         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2646         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2647                             HV_MESSAGE_PAYLOAD_SIZE),
2648         VMSTATE_END_OF_LIST()
2649     }
2650 };
2651 
2652 static bool vmbus_rx_queue_needed(void *opaque)
2653 {
2654     VMBus *vmbus = VMBUS(opaque);
2655     return vmbus->rx_queue_size;
2656 }
2657 
2658 static const VMStateDescription vmstate_rx_queue = {
2659     .name = "vmbus/rx_queue",
2660     .version_id = 0,
2661     .minimum_version_id = 0,
2662     .needed = vmbus_rx_queue_needed,
2663     .fields = (VMStateField[]) {
2664         VMSTATE_UINT8(rx_queue_head, VMBus),
2665         VMSTATE_UINT8(rx_queue_size, VMBus),
2666         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2667                              HV_MSG_QUEUE_LEN, 0,
2668                              vmstate_post_message_input,
2669                              struct hyperv_post_message_input),
2670         VMSTATE_END_OF_LIST()
2671     }
2672 };
2673 
2674 static const VMStateDescription vmstate_vmbus = {
2675     .name = TYPE_VMBUS,
2676     .version_id = 0,
2677     .minimum_version_id = 0,
2678     .pre_load = vmbus_pre_load,
2679     .post_load = vmbus_post_load,
2680     .fields = (VMStateField[]) {
2681         VMSTATE_UINT8(state, VMBus),
2682         VMSTATE_UINT32(version, VMBus),
2683         VMSTATE_UINT32(target_vp, VMBus),
2684         VMSTATE_UINT64(int_page_gpa, VMBus),
2685         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2686                          vmstate_gpadl, VMBusGpadl, link),
2687         VMSTATE_END_OF_LIST()
2688     },
2689     .subsections = (const VMStateDescription * []) {
2690         &vmstate_rx_queue,
2691         NULL
2692     }
2693 };
2694 
2695 static const TypeInfo vmbus_type_info = {
2696     .name = TYPE_VMBUS,
2697     .parent = TYPE_BUS,
2698     .instance_size = sizeof(VMBus),
2699     .class_init = vmbus_class_init,
2700 };
2701 
2702 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2703 {
2704     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2705 
2706     /*
2707      * here there's at least one vmbus bridge that is being realized, so
2708      * vmbus_bridge_find can only return NULL if it's not unique
2709      */
2710     if (!vmbus_bridge_find()) {
2711         error_setg(errp, "there can be at most one %s in the system",
2712                    TYPE_VMBUS_BRIDGE);
2713         return;
2714     }
2715 
2716     if (!hyperv_is_synic_enabled()) {
2717         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2718         return;
2719     }
2720 
2721     bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus"));
2722 }
2723 
2724 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2725 {
2726     /* there can be only one VMBus */
2727     return g_strdup("0");
2728 }
2729 
2730 static const VMStateDescription vmstate_vmbus_bridge = {
2731     .name = TYPE_VMBUS_BRIDGE,
2732     .version_id = 0,
2733     .minimum_version_id = 0,
2734     .fields = (VMStateField[]) {
2735         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2736         VMSTATE_END_OF_LIST()
2737     },
2738 };
2739 
2740 static Property vmbus_bridge_props[] = {
2741     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2742     DEFINE_PROP_END_OF_LIST()
2743 };
2744 
2745 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2746 {
2747     DeviceClass *k = DEVICE_CLASS(klass);
2748     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2749 
2750     k->realize = vmbus_bridge_realize;
2751     k->fw_name = "vmbus";
2752     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2753     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2754     k->vmsd = &vmstate_vmbus_bridge;
2755     device_class_set_props(k, vmbus_bridge_props);
2756     /* override SysBusDevice's default */
2757     k->user_creatable = true;
2758 }
2759 
2760 static const TypeInfo vmbus_bridge_type_info = {
2761     .name = TYPE_VMBUS_BRIDGE,
2762     .parent = TYPE_SYS_BUS_DEVICE,
2763     .instance_size = sizeof(VMBusBridge),
2764     .class_init = vmbus_bridge_class_init,
2765 };
2766 
2767 static void vmbus_register_types(void)
2768 {
2769     type_register_static(&vmbus_bridge_type_info);
2770     type_register_static(&vmbus_dev_type_info);
2771     type_register_static(&vmbus_type_info);
2772 }
2773 
2774 type_init(vmbus_register_types)
2775