1 /*
2 * QEMU Hyper-V VMBus
3 *
4 * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "qapi/error.h"
14 #include "migration/vmstate.h"
15 #include "hw/qdev-properties.h"
16 #include "hw/qdev-properties-system.h"
17 #include "hw/hyperv/hyperv.h"
18 #include "hw/hyperv/vmbus.h"
19 #include "hw/hyperv/vmbus-bridge.h"
20 #include "hw/sysbus.h"
21 #include "cpu.h"
22 #include "trace.h"
23
24 enum {
25 VMGPADL_INIT,
26 VMGPADL_ALIVE,
27 VMGPADL_TEARINGDOWN,
28 VMGPADL_TORNDOWN,
29 };
30
31 struct VMBusGpadl {
32 /* GPADL id */
33 uint32_t id;
34 /* associated channel id (rudimentary?) */
35 uint32_t child_relid;
36
37 /* number of pages in the GPADL as declared in GPADL_HEADER message */
38 uint32_t num_gfns;
39 /*
40 * Due to limited message size, GPADL may not fit fully in a single
41 * GPADL_HEADER message, and is further popluated using GPADL_BODY
42 * messages. @seen_gfns is the number of pages seen so far; once it
43 * reaches @num_gfns, the GPADL is ready to use.
44 */
45 uint32_t seen_gfns;
46 /* array of GFNs (of size @num_gfns once allocated) */
47 uint64_t *gfns;
48
49 uint8_t state;
50
51 QTAILQ_ENTRY(VMBusGpadl) link;
52 VMBus *vmbus;
53 unsigned refcount;
54 };
55
56 /*
57 * Wrap sequential read from / write to GPADL.
58 */
59 typedef struct GpadlIter {
60 VMBusGpadl *gpadl;
61 AddressSpace *as;
62 DMADirection dir;
63 /* offset into GPADL where the next i/o will be performed */
64 uint32_t off;
65 /*
66 * Cached mapping of the currently accessed page, up to page boundary.
67 * Updated lazily on i/o.
68 * Note: MemoryRegionCache can not be used here because pages in the GPADL
69 * are non-contiguous and may belong to different memory regions.
70 */
71 void *map;
72 /* offset after last i/o (i.e. not affected by seek) */
73 uint32_t last_off;
74 /*
75 * Indicator that the iterator is active and may have a cached mapping.
76 * Allows to enforce bracketing of all i/o (which may create cached
77 * mappings) and thus exclude mapping leaks.
78 */
79 bool active;
80 } GpadlIter;
81
82 /*
83 * Ring buffer. There are two of them, sitting in the same GPADL, for each
84 * channel.
85 * Each ring buffer consists of a set of pages, with the first page containing
86 * the ring buffer header, and the remaining pages being for data packets.
87 */
88 typedef struct VMBusRingBufCommon {
89 AddressSpace *as;
90 /* GPA of the ring buffer header */
91 dma_addr_t rb_addr;
92 /* start and length of the ring buffer data area within GPADL */
93 uint32_t base;
94 uint32_t len;
95
96 GpadlIter iter;
97 } VMBusRingBufCommon;
98
99 typedef struct VMBusSendRingBuf {
100 VMBusRingBufCommon common;
101 /* current write index, to be committed at the end of send */
102 uint32_t wr_idx;
103 /* write index at the start of send */
104 uint32_t last_wr_idx;
105 /* space to be requested from the guest */
106 uint32_t wanted;
107 /* space reserved for planned sends */
108 uint32_t reserved;
109 /* last seen read index */
110 uint32_t last_seen_rd_idx;
111 } VMBusSendRingBuf;
112
113 typedef struct VMBusRecvRingBuf {
114 VMBusRingBufCommon common;
115 /* current read index, to be committed at the end of receive */
116 uint32_t rd_idx;
117 /* read index at the start of receive */
118 uint32_t last_rd_idx;
119 /* last seen write index */
120 uint32_t last_seen_wr_idx;
121 } VMBusRecvRingBuf;
122
123
124 enum {
125 VMOFFER_INIT,
126 VMOFFER_SENDING,
127 VMOFFER_SENT,
128 };
129
130 enum {
131 VMCHAN_INIT,
132 VMCHAN_OPENING,
133 VMCHAN_OPEN,
134 };
135
136 struct VMBusChannel {
137 VMBusDevice *dev;
138
139 /* channel id */
140 uint32_t id;
141 /*
142 * subchannel index within the device; subchannel #0 is "primary" and
143 * always exists
144 */
145 uint16_t subchan_idx;
146 uint32_t open_id;
147 /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
148 uint32_t target_vp;
149 /* GPADL id to use for the ring buffers */
150 uint32_t ringbuf_gpadl;
151 /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
152 uint32_t ringbuf_send_offset;
153
154 uint8_t offer_state;
155 uint8_t state;
156 bool is_open;
157
158 /* main device worker; copied from the device class */
159 VMBusChannelNotifyCb notify_cb;
160 /*
161 * guest->host notifications, either sent directly or dispatched via
162 * interrupt page (older VMBus)
163 */
164 EventNotifier notifier;
165
166 VMBus *vmbus;
167 /*
168 * SINT route to signal with host->guest notifications; may be shared with
169 * the main VMBus SINT route
170 */
171 HvSintRoute *notify_route;
172 VMBusGpadl *gpadl;
173
174 VMBusSendRingBuf send_ringbuf;
175 VMBusRecvRingBuf recv_ringbuf;
176
177 QTAILQ_ENTRY(VMBusChannel) link;
178 };
179
180 /*
181 * Hyper-V spec mandates that every message port has 16 buffers, which means
182 * that the guest can post up to this many messages without blocking.
183 * Therefore a queue for incoming messages has to be provided.
184 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
185 * doesn't transition to a new state until the message is known to have been
186 * successfully delivered to the respective SynIC message slot.
187 */
188 #define HV_MSG_QUEUE_LEN 16
189
190 /* Hyper-V devices never use channel #0. Must be something special. */
191 #define VMBUS_FIRST_CHANID 1
192 /* Each channel occupies one bit within a single event page sint slot. */
193 #define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
194 /* Leave a few connection numbers for other purposes. */
195 #define VMBUS_CHAN_CONNECTION_OFFSET 16
196
197 /*
198 * Since the success or failure of sending a message is reported
199 * asynchronously, the VMBus state machine has effectively two entry points:
200 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
201 * message delivery status becomes known). Both are run as oneshot BHs on the
202 * main aio context, ensuring serialization.
203 */
204 enum {
205 VMBUS_LISTEN,
206 VMBUS_HANDSHAKE,
207 VMBUS_OFFER,
208 VMBUS_CREATE_GPADL,
209 VMBUS_TEARDOWN_GPADL,
210 VMBUS_OPEN_CHANNEL,
211 VMBUS_UNLOAD,
212 VMBUS_STATE_MAX
213 };
214
215 struct VMBus {
216 BusState parent;
217
218 uint8_t state;
219 /* protection against recursive aio_poll (see vmbus_run) */
220 bool in_progress;
221 /* whether there's a message being delivered to the guest */
222 bool msg_in_progress;
223 uint32_t version;
224 /* VP_INDEX of the vCPU to send messages and interrupts to */
225 uint32_t target_vp;
226 HvSintRoute *sint_route;
227 /*
228 * interrupt page for older protocol versions; newer ones use SynIC event
229 * flags directly
230 */
231 hwaddr int_page_gpa;
232
233 DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
234
235 /* incoming message queue */
236 struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
237 uint8_t rx_queue_head;
238 uint8_t rx_queue_size;
239 QemuMutex rx_queue_lock;
240
241 QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
242 QTAILQ_HEAD(, VMBusChannel) channel_list;
243
244 /*
245 * guest->host notifications for older VMBus, to be dispatched via
246 * interrupt page
247 */
248 EventNotifier notifier;
249 };
250
gpadl_full(VMBusGpadl * gpadl)251 static bool gpadl_full(VMBusGpadl *gpadl)
252 {
253 return gpadl->seen_gfns == gpadl->num_gfns;
254 }
255
create_gpadl(VMBus * vmbus,uint32_t id,uint32_t child_relid,uint32_t num_gfns)256 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
257 uint32_t child_relid, uint32_t num_gfns)
258 {
259 VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
260
261 gpadl->id = id;
262 gpadl->child_relid = child_relid;
263 gpadl->num_gfns = num_gfns;
264 gpadl->gfns = g_new(uint64_t, num_gfns);
265 QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
266 gpadl->vmbus = vmbus;
267 gpadl->refcount = 1;
268 return gpadl;
269 }
270
free_gpadl(VMBusGpadl * gpadl)271 static void free_gpadl(VMBusGpadl *gpadl)
272 {
273 QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
274 g_free(gpadl->gfns);
275 g_free(gpadl);
276 }
277
find_gpadl(VMBus * vmbus,uint32_t gpadl_id)278 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
279 {
280 VMBusGpadl *gpadl;
281 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
282 if (gpadl->id == gpadl_id) {
283 return gpadl;
284 }
285 }
286 return NULL;
287 }
288
vmbus_get_gpadl(VMBusChannel * chan,uint32_t gpadl_id)289 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
290 {
291 VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
292 if (!gpadl || !gpadl_full(gpadl)) {
293 return NULL;
294 }
295 gpadl->refcount++;
296 return gpadl;
297 }
298
vmbus_put_gpadl(VMBusGpadl * gpadl)299 void vmbus_put_gpadl(VMBusGpadl *gpadl)
300 {
301 if (!gpadl) {
302 return;
303 }
304 if (--gpadl->refcount) {
305 return;
306 }
307 free_gpadl(gpadl);
308 }
309
vmbus_gpadl_len(VMBusGpadl * gpadl)310 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
311 {
312 return gpadl->num_gfns * TARGET_PAGE_SIZE;
313 }
314
gpadl_iter_init(GpadlIter * iter,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir)315 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
316 AddressSpace *as, DMADirection dir)
317 {
318 iter->gpadl = gpadl;
319 iter->as = as;
320 iter->dir = dir;
321 iter->active = false;
322 }
323
gpadl_iter_cache_unmap(GpadlIter * iter)324 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
325 {
326 uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
327 uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
328
329 /* mapping is only done to do non-zero amount of i/o */
330 assert(iter->last_off > 0);
331 assert(map_start_in_page < io_end_in_page);
332
333 dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
334 iter->dir, io_end_in_page - map_start_in_page);
335 }
336
337 /*
338 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
339 * The direction of the copy is determined by @iter->dir.
340 * The caller must ensure the operation overflows neither @buf nor the GPADL
341 * (there's an assert for the latter).
342 * Reuse the currently mapped page in the GPADL if possible.
343 */
gpadl_iter_io(GpadlIter * iter,void * buf,uint32_t len)344 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
345 {
346 ssize_t ret = len;
347
348 assert(iter->active);
349
350 while (len) {
351 uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
352 uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
353 uint32_t cplen = MIN(pgleft, len);
354 void *p;
355
356 /* try to reuse the cached mapping */
357 if (iter->map) {
358 uint32_t map_start_in_page =
359 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
360 uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
361 uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
362 if (off_base != mapped_base || off_in_page < map_start_in_page) {
363 gpadl_iter_cache_unmap(iter);
364 iter->map = NULL;
365 }
366 }
367
368 if (!iter->map) {
369 dma_addr_t maddr;
370 dma_addr_t mlen = pgleft;
371 uint32_t idx = iter->off >> TARGET_PAGE_BITS;
372 assert(idx < iter->gpadl->num_gfns);
373
374 maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
375
376 iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
377 MEMTXATTRS_UNSPECIFIED);
378 if (mlen != pgleft) {
379 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
380 iter->map = NULL;
381 return -EFAULT;
382 }
383 }
384
385 p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
386 off_in_page);
387 if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
388 memcpy(p, buf, cplen);
389 } else {
390 memcpy(buf, p, cplen);
391 }
392
393 buf += cplen;
394 len -= cplen;
395 iter->off += cplen;
396 iter->last_off = iter->off;
397 }
398
399 return ret;
400 }
401
402 /*
403 * Position the iterator @iter at new offset @new_off.
404 * If this results in the cached mapping being unusable with the new offset,
405 * unmap it.
406 */
gpadl_iter_seek(GpadlIter * iter,uint32_t new_off)407 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
408 {
409 assert(iter->active);
410 iter->off = new_off;
411 }
412
413 /*
414 * Start a series of i/o on the GPADL.
415 * After this i/o and seek operations on @iter become legal.
416 */
gpadl_iter_start_io(GpadlIter * iter)417 static inline void gpadl_iter_start_io(GpadlIter *iter)
418 {
419 assert(!iter->active);
420 /* mapping is cached lazily on i/o */
421 iter->map = NULL;
422 iter->active = true;
423 }
424
425 /*
426 * End the eariler started series of i/o on the GPADL and release the cached
427 * mapping if any.
428 */
gpadl_iter_end_io(GpadlIter * iter)429 static inline void gpadl_iter_end_io(GpadlIter *iter)
430 {
431 assert(iter->active);
432
433 if (iter->map) {
434 gpadl_iter_cache_unmap(iter);
435 }
436
437 iter->active = false;
438 }
439
440 static void vmbus_resched(VMBus *vmbus);
441 static void vmbus_msg_cb(void *data, int status);
442
vmbus_iov_to_gpadl(VMBusChannel * chan,VMBusGpadl * gpadl,uint32_t off,const struct iovec * iov,size_t iov_cnt)443 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
444 const struct iovec *iov, size_t iov_cnt)
445 {
446 GpadlIter iter;
447 size_t i;
448 ssize_t ret = 0;
449
450 gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
451 DMA_DIRECTION_FROM_DEVICE);
452 gpadl_iter_start_io(&iter);
453 gpadl_iter_seek(&iter, off);
454 for (i = 0; i < iov_cnt; i++) {
455 ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
456 if (ret < 0) {
457 goto out;
458 }
459 }
460 out:
461 gpadl_iter_end_io(&iter);
462 return ret;
463 }
464
vmbus_map_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t len,size_t off)465 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
466 unsigned iov_cnt, size_t len, size_t off)
467 {
468 int ret_cnt = 0, ret;
469 unsigned i;
470 QEMUSGList *sgl = &req->sgl;
471 ScatterGatherEntry *sg = sgl->sg;
472
473 for (i = 0; i < sgl->nsg; i++) {
474 if (sg[i].len > off) {
475 break;
476 }
477 off -= sg[i].len;
478 }
479 for (; len && i < sgl->nsg; i++) {
480 dma_addr_t mlen = MIN(sg[i].len - off, len);
481 dma_addr_t addr = sg[i].base + off;
482 len -= mlen;
483 off = 0;
484
485 for (; mlen; ret_cnt++) {
486 dma_addr_t l = mlen;
487 dma_addr_t a = addr;
488
489 if (ret_cnt == iov_cnt) {
490 ret = -ENOBUFS;
491 goto err;
492 }
493
494 iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
495 MEMTXATTRS_UNSPECIFIED);
496 if (!l) {
497 ret = -EFAULT;
498 goto err;
499 }
500 iov[ret_cnt].iov_len = l;
501 addr += l;
502 mlen -= l;
503 }
504 }
505
506 return ret_cnt;
507 err:
508 vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
509 return ret;
510 }
511
vmbus_unmap_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t accessed)512 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
513 unsigned iov_cnt, size_t accessed)
514 {
515 QEMUSGList *sgl = &req->sgl;
516 unsigned i;
517
518 for (i = 0; i < iov_cnt; i++) {
519 size_t acsd = MIN(accessed, iov[i].iov_len);
520 dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
521 accessed -= acsd;
522 }
523 }
524
525 static const VMStateDescription vmstate_gpadl = {
526 .name = "vmbus/gpadl",
527 .version_id = 0,
528 .minimum_version_id = 0,
529 .fields = (const VMStateField[]) {
530 VMSTATE_UINT32(id, VMBusGpadl),
531 VMSTATE_UINT32(child_relid, VMBusGpadl),
532 VMSTATE_UINT32(num_gfns, VMBusGpadl),
533 VMSTATE_UINT32(seen_gfns, VMBusGpadl),
534 VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
535 vmstate_info_uint64, uint64_t),
536 VMSTATE_UINT8(state, VMBusGpadl),
537 VMSTATE_END_OF_LIST()
538 }
539 };
540
541 /*
542 * Wrap the index into a ring buffer of @len bytes.
543 * @idx is assumed not to exceed twice the size of the ringbuffer, so only
544 * single wraparound is considered.
545 */
rb_idx_wrap(uint32_t idx,uint32_t len)546 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
547 {
548 if (idx >= len) {
549 idx -= len;
550 }
551 return idx;
552 }
553
554 /*
555 * Circular difference between two indices into a ring buffer of @len bytes.
556 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
557 * up write index but not vice versa.
558 */
rb_idx_delta(uint32_t idx1,uint32_t idx2,uint32_t len,bool allow_catchup)559 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
560 bool allow_catchup)
561 {
562 return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
563 }
564
ringbuf_map_hdr(VMBusRingBufCommon * ringbuf)565 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
566 {
567 vmbus_ring_buffer *rb;
568 dma_addr_t mlen = sizeof(*rb);
569
570 rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
571 DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
572 if (mlen != sizeof(*rb)) {
573 dma_memory_unmap(ringbuf->as, rb, mlen,
574 DMA_DIRECTION_FROM_DEVICE, 0);
575 return NULL;
576 }
577 return rb;
578 }
579
ringbuf_unmap_hdr(VMBusRingBufCommon * ringbuf,vmbus_ring_buffer * rb,bool dirty)580 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
581 vmbus_ring_buffer *rb, bool dirty)
582 {
583 assert(rb);
584
585 dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
586 dirty ? sizeof(*rb) : 0);
587 }
588
ringbuf_init_common(VMBusRingBufCommon * ringbuf,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir,uint32_t begin,uint32_t end)589 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
590 AddressSpace *as, DMADirection dir,
591 uint32_t begin, uint32_t end)
592 {
593 ringbuf->as = as;
594 ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
595 ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
596 ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
597 gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
598 }
599
ringbufs_init(VMBusChannel * chan)600 static int ringbufs_init(VMBusChannel *chan)
601 {
602 vmbus_ring_buffer *rb;
603 VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
604 VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
605
606 if (chan->ringbuf_send_offset <= 1 ||
607 chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
608 return -EINVAL;
609 }
610
611 ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
612 DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
613 ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
614 DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
615 chan->gpadl->num_gfns);
616 send_ringbuf->wanted = 0;
617 send_ringbuf->reserved = 0;
618
619 rb = ringbuf_map_hdr(&recv_ringbuf->common);
620 if (!rb) {
621 return -EFAULT;
622 }
623 recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
624 ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
625
626 rb = ringbuf_map_hdr(&send_ringbuf->common);
627 if (!rb) {
628 return -EFAULT;
629 }
630 send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
631 send_ringbuf->last_seen_rd_idx = rb->read_index;
632 rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
633 ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
634
635 if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
636 send_ringbuf->wr_idx >= send_ringbuf->common.len) {
637 return -EOVERFLOW;
638 }
639
640 return 0;
641 }
642
643 /*
644 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
645 * around if needed.
646 * @len is assumed not to exceed the size of the ringbuffer, so only single
647 * wraparound is considered.
648 */
ringbuf_io(VMBusRingBufCommon * ringbuf,void * buf,uint32_t len)649 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
650 {
651 ssize_t ret1 = 0, ret2 = 0;
652 uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
653
654 if (len >= remain) {
655 ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
656 if (ret1 < 0) {
657 return ret1;
658 }
659 gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
660 buf += remain;
661 len -= remain;
662 }
663 ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
664 if (ret2 < 0) {
665 return ret2;
666 }
667 return ret1 + ret2;
668 }
669
670 /*
671 * Position the circular iterator within @ringbuf to offset @new_off, wrapping
672 * around if needed.
673 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
674 * single wraparound is considered.
675 */
ringbuf_seek(VMBusRingBufCommon * ringbuf,uint32_t new_off)676 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
677 {
678 gpadl_iter_seek(&ringbuf->iter,
679 ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
680 }
681
ringbuf_tell(VMBusRingBufCommon * ringbuf)682 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
683 {
684 return ringbuf->iter.off - ringbuf->base;
685 }
686
ringbuf_start_io(VMBusRingBufCommon * ringbuf)687 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
688 {
689 gpadl_iter_start_io(&ringbuf->iter);
690 }
691
ringbuf_end_io(VMBusRingBufCommon * ringbuf)692 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
693 {
694 gpadl_iter_end_io(&ringbuf->iter);
695 }
696
vmbus_channel_device(VMBusChannel * chan)697 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
698 {
699 return chan->dev;
700 }
701
vmbus_device_channel(VMBusDevice * dev,uint32_t chan_idx)702 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
703 {
704 if (chan_idx >= dev->num_channels) {
705 return NULL;
706 }
707 return &dev->channels[chan_idx];
708 }
709
vmbus_channel_idx(VMBusChannel * chan)710 uint32_t vmbus_channel_idx(VMBusChannel *chan)
711 {
712 return chan - chan->dev->channels;
713 }
714
vmbus_channel_notify_host(VMBusChannel * chan)715 void vmbus_channel_notify_host(VMBusChannel *chan)
716 {
717 event_notifier_set(&chan->notifier);
718 }
719
vmbus_channel_is_open(VMBusChannel * chan)720 bool vmbus_channel_is_open(VMBusChannel *chan)
721 {
722 return chan->is_open;
723 }
724
725 /*
726 * Notify the guest side about the data to work on in the channel ring buffer.
727 * The notification is done by signaling a dedicated per-channel SynIC event
728 * flag (more recent guests) or setting a bit in the interrupt page and firing
729 * the VMBus SINT (older guests).
730 */
vmbus_channel_notify_guest(VMBusChannel * chan)731 static int vmbus_channel_notify_guest(VMBusChannel *chan)
732 {
733 int res = 0;
734 unsigned long *int_map, mask;
735 unsigned idx;
736 hwaddr addr = chan->vmbus->int_page_gpa;
737 hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
738
739 trace_vmbus_channel_notify_guest(chan->id);
740
741 if (!addr) {
742 return hyperv_set_event_flag(chan->notify_route, chan->id);
743 }
744
745 int_map = cpu_physical_memory_map(addr, &len, 1);
746 if (len != TARGET_PAGE_SIZE / 2) {
747 res = -ENXIO;
748 goto unmap;
749 }
750
751 idx = BIT_WORD(chan->id);
752 mask = BIT_MASK(chan->id);
753 if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
754 res = hyperv_sint_route_set_sint(chan->notify_route);
755 dirty = len;
756 }
757
758 unmap:
759 cpu_physical_memory_unmap(int_map, len, 1, dirty);
760 return res;
761 }
762
763 #define VMBUS_PKT_TRAILER sizeof(uint64_t)
764
vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr * hdr,uint32_t desclen,uint32_t msglen)765 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
766 uint32_t desclen, uint32_t msglen)
767 {
768 hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
769 DIV_ROUND_UP(desclen, sizeof(uint64_t));
770 hdr->len_qwords = hdr->offset_qwords +
771 DIV_ROUND_UP(msglen, sizeof(uint64_t));
772 return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
773 }
774
775 /*
776 * Simplified ring buffer operation with paired barriers annotations in the
777 * producer and consumer loops:
778 *
779 * producer * consumer
780 * ~~~~~~~~ * ~~~~~~~~
781 * write pending_send_sz * read write_index
782 * smp_mb [A] * smp_mb [C]
783 * read read_index * read packet
784 * smp_mb [B] * read/write out-of-band data
785 * read/write out-of-band data * smp_mb [B]
786 * write packet * write read_index
787 * smp_mb [C] * smp_mb [A]
788 * write write_index * read pending_send_sz
789 * smp_wmb [D] * smp_rmb [D]
790 * write pending_send_sz * read write_index
791 * ... * ...
792 */
793
ringbuf_send_avail(VMBusSendRingBuf * ringbuf)794 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
795 {
796 /* don't trust guest data */
797 if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
798 return 0;
799 }
800 return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
801 ringbuf->common.len, false);
802 }
803
ringbuf_send_update_idx(VMBusChannel * chan)804 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
805 {
806 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
807 vmbus_ring_buffer *rb;
808 uint32_t written;
809
810 written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
811 ringbuf->common.len, true);
812 if (!written) {
813 return 0;
814 }
815
816 rb = ringbuf_map_hdr(&ringbuf->common);
817 if (!rb) {
818 return -EFAULT;
819 }
820
821 ringbuf->reserved -= written;
822
823 /* prevent reorder with the data operation and packet write */
824 smp_mb(); /* barrier pair [C] */
825 rb->write_index = ringbuf->wr_idx;
826
827 /*
828 * If the producer earlier indicated that it wants to be notified when the
829 * consumer frees certain amount of space in the ring buffer, that amount
830 * is reduced by the size of the completed write.
831 */
832 if (ringbuf->wanted) {
833 /* otherwise reservation would fail */
834 assert(ringbuf->wanted < written);
835 ringbuf->wanted -= written;
836 /* prevent reorder with write_index write */
837 smp_wmb(); /* barrier pair [D] */
838 rb->pending_send_sz = ringbuf->wanted;
839 }
840
841 /* prevent reorder with write_index or pending_send_sz write */
842 smp_mb(); /* barrier pair [A] */
843 ringbuf->last_seen_rd_idx = rb->read_index;
844
845 /*
846 * The consumer may have missed the reduction of pending_send_sz and skip
847 * notification, so re-check the blocking condition, and, if it's no longer
848 * true, ensure processing another iteration by simulating consumer's
849 * notification.
850 */
851 if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
852 vmbus_channel_notify_host(chan);
853 }
854
855 /* skip notification by consumer's request */
856 if (rb->interrupt_mask) {
857 goto out;
858 }
859
860 /*
861 * The consumer hasn't caught up with the producer's previous state so it's
862 * not blocked.
863 * (last_seen_rd_idx comes from the guest but it's safe to use w/o
864 * validation here as it only affects notification.)
865 */
866 if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
867 ringbuf->common.len, true) > written) {
868 goto out;
869 }
870
871 vmbus_channel_notify_guest(chan);
872 out:
873 ringbuf_unmap_hdr(&ringbuf->common, rb, true);
874 ringbuf->last_wr_idx = ringbuf->wr_idx;
875 return written;
876 }
877
vmbus_channel_reserve(VMBusChannel * chan,uint32_t desclen,uint32_t msglen)878 int vmbus_channel_reserve(VMBusChannel *chan,
879 uint32_t desclen, uint32_t msglen)
880 {
881 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
882 vmbus_ring_buffer *rb = NULL;
883 vmbus_packet_hdr hdr;
884 uint32_t needed = ringbuf->reserved +
885 vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
886
887 /* avoid touching the guest memory if possible */
888 if (likely(needed <= ringbuf_send_avail(ringbuf))) {
889 goto success;
890 }
891
892 rb = ringbuf_map_hdr(&ringbuf->common);
893 if (!rb) {
894 return -EFAULT;
895 }
896
897 /* fetch read index from guest memory and try again */
898 ringbuf->last_seen_rd_idx = rb->read_index;
899
900 if (likely(needed <= ringbuf_send_avail(ringbuf))) {
901 goto success;
902 }
903
904 rb->pending_send_sz = needed;
905
906 /*
907 * The consumer may have made progress and freed up some space before
908 * seeing updated pending_send_sz, so re-read read_index (preventing
909 * reorder with the pending_send_sz write) and try again.
910 */
911 smp_mb(); /* barrier pair [A] */
912 ringbuf->last_seen_rd_idx = rb->read_index;
913
914 if (needed > ringbuf_send_avail(ringbuf)) {
915 goto out;
916 }
917
918 success:
919 ringbuf->reserved = needed;
920 needed = 0;
921
922 /* clear pending_send_sz if it was set */
923 if (ringbuf->wanted) {
924 if (!rb) {
925 rb = ringbuf_map_hdr(&ringbuf->common);
926 if (!rb) {
927 /* failure to clear pending_send_sz is non-fatal */
928 goto out;
929 }
930 }
931
932 rb->pending_send_sz = 0;
933 }
934
935 /* prevent reorder of the following data operation with read_index read */
936 smp_mb(); /* barrier pair [B] */
937
938 out:
939 if (rb) {
940 ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
941 }
942 ringbuf->wanted = needed;
943 return needed ? -ENOSPC : 0;
944 }
945
vmbus_channel_send(VMBusChannel * chan,uint16_t pkt_type,void * desc,uint32_t desclen,void * msg,uint32_t msglen,bool need_comp,uint64_t transaction_id)946 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
947 void *desc, uint32_t desclen,
948 void *msg, uint32_t msglen,
949 bool need_comp, uint64_t transaction_id)
950 {
951 ssize_t ret = 0;
952 vmbus_packet_hdr hdr;
953 uint32_t totlen;
954 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
955
956 if (!vmbus_channel_is_open(chan)) {
957 return -EINVAL;
958 }
959
960 totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
961 hdr.type = pkt_type;
962 hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
963 hdr.transaction_id = transaction_id;
964
965 assert(totlen <= ringbuf->reserved);
966
967 ringbuf_start_io(&ringbuf->common);
968 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
969 ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
970 if (ret < 0) {
971 goto out;
972 }
973 if (desclen) {
974 assert(desc);
975 ret = ringbuf_io(&ringbuf->common, desc, desclen);
976 if (ret < 0) {
977 goto out;
978 }
979 ringbuf_seek(&ringbuf->common,
980 ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
981 }
982 ret = ringbuf_io(&ringbuf->common, msg, msglen);
983 if (ret < 0) {
984 goto out;
985 }
986 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
987 ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
988 ret = 0;
989 out:
990 ringbuf_end_io(&ringbuf->common);
991 if (ret) {
992 return ret;
993 }
994 return ringbuf_send_update_idx(chan);
995 }
996
vmbus_channel_send_completion(VMBusChanReq * req,void * msg,uint32_t msglen)997 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
998 void *msg, uint32_t msglen)
999 {
1000 assert(req->need_comp);
1001 return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1002 msg, msglen, false, req->transaction_id);
1003 }
1004
sgl_from_gpa_ranges(QEMUSGList * sgl,VMBusDevice * dev,VMBusRingBufCommon * ringbuf,uint32_t len)1005 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1006 VMBusRingBufCommon *ringbuf, uint32_t len)
1007 {
1008 int ret;
1009 vmbus_pkt_gpa_direct hdr;
1010 hwaddr curaddr = 0;
1011 hwaddr curlen = 0;
1012 int num;
1013
1014 if (len < sizeof(hdr)) {
1015 return -EIO;
1016 }
1017 ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1018 if (ret < 0) {
1019 return ret;
1020 }
1021 len -= sizeof(hdr);
1022
1023 num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1024 if (num < 0) {
1025 return -EIO;
1026 }
1027 qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1028
1029 for (; hdr.rangecount; hdr.rangecount--) {
1030 vmbus_gpa_range range;
1031
1032 if (len < sizeof(range)) {
1033 goto eio;
1034 }
1035 ret = ringbuf_io(ringbuf, &range, sizeof(range));
1036 if (ret < 0) {
1037 goto err;
1038 }
1039 len -= sizeof(range);
1040
1041 if (range.byte_offset & TARGET_PAGE_MASK) {
1042 goto eio;
1043 }
1044
1045 for (; range.byte_count; range.byte_offset = 0) {
1046 uint64_t paddr;
1047 uint32_t plen = MIN(range.byte_count,
1048 TARGET_PAGE_SIZE - range.byte_offset);
1049
1050 if (len < sizeof(uint64_t)) {
1051 goto eio;
1052 }
1053 ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1054 if (ret < 0) {
1055 goto err;
1056 }
1057 len -= sizeof(uint64_t);
1058 paddr <<= TARGET_PAGE_BITS;
1059 paddr |= range.byte_offset;
1060 range.byte_count -= plen;
1061
1062 if (curaddr + curlen == paddr) {
1063 /* consecutive fragments - join */
1064 curlen += plen;
1065 } else {
1066 if (curlen) {
1067 qemu_sglist_add(sgl, curaddr, curlen);
1068 }
1069
1070 curaddr = paddr;
1071 curlen = plen;
1072 }
1073 }
1074 }
1075
1076 if (curlen) {
1077 qemu_sglist_add(sgl, curaddr, curlen);
1078 }
1079
1080 return 0;
1081 eio:
1082 ret = -EIO;
1083 err:
1084 qemu_sglist_destroy(sgl);
1085 return ret;
1086 }
1087
vmbus_alloc_req(VMBusChannel * chan,uint32_t size,uint16_t pkt_type,uint32_t msglen,uint64_t transaction_id,bool need_comp)1088 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1089 uint32_t size, uint16_t pkt_type,
1090 uint32_t msglen, uint64_t transaction_id,
1091 bool need_comp)
1092 {
1093 VMBusChanReq *req;
1094 uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1095 uint32_t totlen = msgoff + msglen;
1096
1097 req = g_malloc0(totlen);
1098 req->chan = chan;
1099 req->pkt_type = pkt_type;
1100 req->msg = (void *)req + msgoff;
1101 req->msglen = msglen;
1102 req->transaction_id = transaction_id;
1103 req->need_comp = need_comp;
1104 return req;
1105 }
1106
vmbus_channel_recv_start(VMBusChannel * chan)1107 int vmbus_channel_recv_start(VMBusChannel *chan)
1108 {
1109 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1110 vmbus_ring_buffer *rb;
1111
1112 rb = ringbuf_map_hdr(&ringbuf->common);
1113 if (!rb) {
1114 return -EFAULT;
1115 }
1116 ringbuf->last_seen_wr_idx = rb->write_index;
1117 ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1118
1119 if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1120 return -EOVERFLOW;
1121 }
1122
1123 /* prevent reorder of the following data operation with write_index read */
1124 smp_mb(); /* barrier pair [C] */
1125 return 0;
1126 }
1127
vmbus_channel_recv_peek(VMBusChannel * chan,uint32_t size)1128 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1129 {
1130 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1131 vmbus_packet_hdr hdr = {};
1132 VMBusChanReq *req;
1133 uint32_t avail;
1134 uint32_t totlen, pktlen, msglen, msgoff, desclen;
1135
1136 assert(size >= sizeof(*req));
1137
1138 /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1139 avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1140 ringbuf->common.len, true);
1141 if (avail < sizeof(hdr)) {
1142 return NULL;
1143 }
1144
1145 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1146 if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1147 return NULL;
1148 }
1149
1150 pktlen = hdr.len_qwords * sizeof(uint64_t);
1151 totlen = pktlen + VMBUS_PKT_TRAILER;
1152 if (totlen > avail) {
1153 return NULL;
1154 }
1155
1156 msgoff = hdr.offset_qwords * sizeof(uint64_t);
1157 if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1158 error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1159 return NULL;
1160 }
1161
1162 msglen = pktlen - msgoff;
1163
1164 req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1165 hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1166
1167 switch (hdr.type) {
1168 case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1169 desclen = msgoff - sizeof(hdr);
1170 if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1171 desclen) < 0) {
1172 error_report("%s: failed to convert GPA ranges to SGL", __func__);
1173 goto free_req;
1174 }
1175 break;
1176 case VMBUS_PACKET_DATA_INBAND:
1177 case VMBUS_PACKET_COMP:
1178 break;
1179 default:
1180 error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1181 goto free_req;
1182 }
1183
1184 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1185 if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1186 goto free_req;
1187 }
1188 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1189
1190 return req;
1191 free_req:
1192 vmbus_free_req(req);
1193 return NULL;
1194 }
1195
vmbus_channel_recv_pop(VMBusChannel * chan)1196 void vmbus_channel_recv_pop(VMBusChannel *chan)
1197 {
1198 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1199 ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1200 }
1201
vmbus_channel_recv_done(VMBusChannel * chan)1202 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1203 {
1204 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1205 vmbus_ring_buffer *rb;
1206 uint32_t read;
1207
1208 read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1209 ringbuf->common.len, true);
1210 if (!read) {
1211 return 0;
1212 }
1213
1214 rb = ringbuf_map_hdr(&ringbuf->common);
1215 if (!rb) {
1216 return -EFAULT;
1217 }
1218
1219 /* prevent reorder with the data operation and packet read */
1220 smp_mb(); /* barrier pair [B] */
1221 rb->read_index = ringbuf->rd_idx;
1222
1223 /* prevent reorder of the following pending_send_sz read */
1224 smp_mb(); /* barrier pair [A] */
1225
1226 if (rb->interrupt_mask) {
1227 goto out;
1228 }
1229
1230 if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1231 uint32_t wr_idx, wr_avail;
1232 uint32_t wanted = rb->pending_send_sz;
1233
1234 if (!wanted) {
1235 goto out;
1236 }
1237
1238 /* prevent reorder with pending_send_sz read */
1239 smp_rmb(); /* barrier pair [D] */
1240 wr_idx = rb->write_index;
1241
1242 wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1243 true);
1244
1245 /* the producer wasn't blocked on the consumer state */
1246 if (wr_avail >= read + wanted) {
1247 goto out;
1248 }
1249 /* there's not enough space for the producer to make progress */
1250 if (wr_avail < wanted) {
1251 goto out;
1252 }
1253 }
1254
1255 vmbus_channel_notify_guest(chan);
1256 out:
1257 ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1258 ringbuf->last_rd_idx = ringbuf->rd_idx;
1259 return read;
1260 }
1261
vmbus_free_req(void * req)1262 void vmbus_free_req(void *req)
1263 {
1264 VMBusChanReq *r = req;
1265
1266 if (!req) {
1267 return;
1268 }
1269
1270 if (r->sgl.dev) {
1271 qemu_sglist_destroy(&r->sgl);
1272 }
1273 g_free(req);
1274 }
1275
channel_event_cb(EventNotifier * e)1276 static void channel_event_cb(EventNotifier *e)
1277 {
1278 VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1279 if (event_notifier_test_and_clear(e)) {
1280 /*
1281 * All receives are supposed to happen within the device worker, so
1282 * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1283 * potentially reuse the cached mapping throughout the worker.
1284 * Can't do this for sends as they may happen outside the device
1285 * worker.
1286 */
1287 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1288 ringbuf_start_io(&ringbuf->common);
1289 chan->notify_cb(chan);
1290 ringbuf_end_io(&ringbuf->common);
1291
1292 }
1293 }
1294
alloc_chan_id(VMBus * vmbus)1295 static int alloc_chan_id(VMBus *vmbus)
1296 {
1297 int ret;
1298
1299 ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1300 if (ret == VMBUS_CHANID_COUNT) {
1301 return -ENOMEM;
1302 }
1303 return ret + VMBUS_FIRST_CHANID;
1304 }
1305
register_chan_id(VMBusChannel * chan)1306 static int register_chan_id(VMBusChannel *chan)
1307 {
1308 return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1309 chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1310 }
1311
unregister_chan_id(VMBusChannel * chan)1312 static void unregister_chan_id(VMBusChannel *chan)
1313 {
1314 clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1315 }
1316
chan_connection_id(VMBusChannel * chan)1317 static uint32_t chan_connection_id(VMBusChannel *chan)
1318 {
1319 return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1320 }
1321
init_channel(VMBus * vmbus,VMBusDevice * dev,VMBusDeviceClass * vdc,VMBusChannel * chan,uint16_t idx,Error ** errp)1322 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1323 VMBusChannel *chan, uint16_t idx, Error **errp)
1324 {
1325 int res;
1326
1327 chan->dev = dev;
1328 chan->notify_cb = vdc->chan_notify_cb;
1329 chan->subchan_idx = idx;
1330 chan->vmbus = vmbus;
1331
1332 res = alloc_chan_id(vmbus);
1333 if (res < 0) {
1334 error_setg(errp, "no spare channel id");
1335 return;
1336 }
1337 chan->id = res;
1338 register_chan_id(chan);
1339
1340 /*
1341 * The guest drivers depend on the device subchannels (idx #1+) to be
1342 * offered after the primary channel (idx #0) of that device. To ensure
1343 * that, record the channels on the channel list in the order they appear
1344 * within the device.
1345 */
1346 QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1347 }
1348
deinit_channel(VMBusChannel * chan)1349 static void deinit_channel(VMBusChannel *chan)
1350 {
1351 assert(chan->state == VMCHAN_INIT);
1352 QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1353 unregister_chan_id(chan);
1354 }
1355
create_channels(VMBus * vmbus,VMBusDevice * dev,Error ** errp)1356 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1357 {
1358 uint16_t i;
1359 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1360 Error *err = NULL;
1361
1362 dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1363 if (dev->num_channels < 1) {
1364 error_setg(errp, "invalid #channels: %u", dev->num_channels);
1365 return;
1366 }
1367
1368 dev->channels = g_new0(VMBusChannel, dev->num_channels);
1369 for (i = 0; i < dev->num_channels; i++) {
1370 init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1371 if (err) {
1372 goto err_init;
1373 }
1374 }
1375
1376 return;
1377
1378 err_init:
1379 while (i--) {
1380 deinit_channel(&dev->channels[i]);
1381 }
1382 error_propagate(errp, err);
1383 }
1384
free_channels(VMBusDevice * dev)1385 static void free_channels(VMBusDevice *dev)
1386 {
1387 uint16_t i;
1388 for (i = 0; i < dev->num_channels; i++) {
1389 deinit_channel(&dev->channels[i]);
1390 }
1391 g_free(dev->channels);
1392 }
1393
make_sint_route(VMBus * vmbus,uint32_t vp_index)1394 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1395 {
1396 VMBusChannel *chan;
1397
1398 if (vp_index == vmbus->target_vp) {
1399 hyperv_sint_route_ref(vmbus->sint_route);
1400 return vmbus->sint_route;
1401 }
1402
1403 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1404 if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1405 hyperv_sint_route_ref(chan->notify_route);
1406 return chan->notify_route;
1407 }
1408 }
1409
1410 return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1411 }
1412
open_channel(VMBusChannel * chan)1413 static void open_channel(VMBusChannel *chan)
1414 {
1415 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1416
1417 chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1418 if (!chan->gpadl) {
1419 return;
1420 }
1421
1422 if (ringbufs_init(chan)) {
1423 goto put_gpadl;
1424 }
1425
1426 if (event_notifier_init(&chan->notifier, 0)) {
1427 goto put_gpadl;
1428 }
1429
1430 event_notifier_set_handler(&chan->notifier, channel_event_cb);
1431
1432 if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1433 &chan->notifier)) {
1434 goto cleanup_notifier;
1435 }
1436
1437 chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1438 if (!chan->notify_route) {
1439 goto clear_event_flag_handler;
1440 }
1441
1442 if (vdc->open_channel && vdc->open_channel(chan)) {
1443 goto unref_sint_route;
1444 }
1445
1446 chan->is_open = true;
1447 return;
1448
1449 unref_sint_route:
1450 hyperv_sint_route_unref(chan->notify_route);
1451 clear_event_flag_handler:
1452 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1453 cleanup_notifier:
1454 event_notifier_set_handler(&chan->notifier, NULL);
1455 event_notifier_cleanup(&chan->notifier);
1456 put_gpadl:
1457 vmbus_put_gpadl(chan->gpadl);
1458 }
1459
close_channel(VMBusChannel * chan)1460 static void close_channel(VMBusChannel *chan)
1461 {
1462 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1463
1464 if (!chan->is_open) {
1465 return;
1466 }
1467
1468 if (vdc->close_channel) {
1469 vdc->close_channel(chan);
1470 }
1471
1472 hyperv_sint_route_unref(chan->notify_route);
1473 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1474 event_notifier_set_handler(&chan->notifier, NULL);
1475 event_notifier_cleanup(&chan->notifier);
1476 vmbus_put_gpadl(chan->gpadl);
1477 chan->is_open = false;
1478 }
1479
channel_post_load(void * opaque,int version_id)1480 static int channel_post_load(void *opaque, int version_id)
1481 {
1482 VMBusChannel *chan = opaque;
1483
1484 return register_chan_id(chan);
1485 }
1486
1487 static const VMStateDescription vmstate_channel = {
1488 .name = "vmbus/channel",
1489 .version_id = 0,
1490 .minimum_version_id = 0,
1491 .post_load = channel_post_load,
1492 .fields = (const VMStateField[]) {
1493 VMSTATE_UINT32(id, VMBusChannel),
1494 VMSTATE_UINT16(subchan_idx, VMBusChannel),
1495 VMSTATE_UINT32(open_id, VMBusChannel),
1496 VMSTATE_UINT32(target_vp, VMBusChannel),
1497 VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1498 VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1499 VMSTATE_UINT8(offer_state, VMBusChannel),
1500 VMSTATE_UINT8(state, VMBusChannel),
1501 VMSTATE_END_OF_LIST()
1502 }
1503 };
1504
find_channel(VMBus * vmbus,uint32_t id)1505 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1506 {
1507 VMBusChannel *chan;
1508 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1509 if (chan->id == id) {
1510 return chan;
1511 }
1512 }
1513 return NULL;
1514 }
1515
enqueue_incoming_message(VMBus * vmbus,const struct hyperv_post_message_input * msg)1516 static int enqueue_incoming_message(VMBus *vmbus,
1517 const struct hyperv_post_message_input *msg)
1518 {
1519 int ret = 0;
1520 uint8_t idx, prev_size;
1521
1522 qemu_mutex_lock(&vmbus->rx_queue_lock);
1523
1524 if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1525 ret = -ENOBUFS;
1526 goto out;
1527 }
1528
1529 prev_size = vmbus->rx_queue_size;
1530 idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1531 memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1532 vmbus->rx_queue_size++;
1533
1534 /* only need to resched if the queue was empty before */
1535 if (!prev_size) {
1536 vmbus_resched(vmbus);
1537 }
1538 out:
1539 qemu_mutex_unlock(&vmbus->rx_queue_lock);
1540 return ret;
1541 }
1542
vmbus_recv_message(const struct hyperv_post_message_input * msg,void * data)1543 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1544 void *data)
1545 {
1546 VMBus *vmbus = data;
1547 struct vmbus_message_header *vmbus_msg;
1548
1549 if (msg->message_type != HV_MESSAGE_VMBUS) {
1550 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1551 }
1552
1553 if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1554 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1555 }
1556
1557 vmbus_msg = (struct vmbus_message_header *)msg->payload;
1558
1559 trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1560
1561 if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1562 vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1563 error_report("vmbus: unknown message type %#x",
1564 vmbus_msg->message_type);
1565 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1566 }
1567
1568 if (enqueue_incoming_message(vmbus, msg)) {
1569 return HV_STATUS_INSUFFICIENT_BUFFERS;
1570 }
1571 return HV_STATUS_SUCCESS;
1572 }
1573
vmbus_initialized(VMBus * vmbus)1574 static bool vmbus_initialized(VMBus *vmbus)
1575 {
1576 return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1577 }
1578
vmbus_reset_all(VMBus * vmbus)1579 static void vmbus_reset_all(VMBus *vmbus)
1580 {
1581 bus_cold_reset(BUS(vmbus));
1582 }
1583
post_msg(VMBus * vmbus,void * msgdata,uint32_t msglen)1584 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1585 {
1586 int ret;
1587 struct hyperv_message msg = {
1588 .header.message_type = HV_MESSAGE_VMBUS,
1589 };
1590
1591 assert(!vmbus->msg_in_progress);
1592 assert(msglen <= sizeof(msg.payload));
1593 assert(msglen >= sizeof(struct vmbus_message_header));
1594
1595 vmbus->msg_in_progress = true;
1596
1597 trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1598 msglen);
1599
1600 memcpy(msg.payload, msgdata, msglen);
1601 msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1602
1603 ret = hyperv_post_msg(vmbus->sint_route, &msg);
1604 if (ret == 0 || ret == -EAGAIN) {
1605 return;
1606 }
1607
1608 error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1609 vmbus_reset_all(vmbus);
1610 }
1611
vmbus_init(VMBus * vmbus)1612 static int vmbus_init(VMBus *vmbus)
1613 {
1614 if (vmbus->target_vp != (uint32_t)-1) {
1615 vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1616 vmbus_msg_cb, vmbus);
1617 if (!vmbus->sint_route) {
1618 error_report("failed to set up SINT route");
1619 return -ENOMEM;
1620 }
1621 }
1622 return 0;
1623 }
1624
vmbus_deinit(VMBus * vmbus)1625 static void vmbus_deinit(VMBus *vmbus)
1626 {
1627 VMBusGpadl *gpadl, *tmp_gpadl;
1628 VMBusChannel *chan;
1629
1630 QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1631 if (gpadl->state == VMGPADL_TORNDOWN) {
1632 continue;
1633 }
1634 vmbus_put_gpadl(gpadl);
1635 }
1636
1637 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1638 chan->offer_state = VMOFFER_INIT;
1639 }
1640
1641 hyperv_sint_route_unref(vmbus->sint_route);
1642 vmbus->sint_route = NULL;
1643 vmbus->int_page_gpa = 0;
1644 vmbus->target_vp = (uint32_t)-1;
1645 vmbus->version = 0;
1646 vmbus->state = VMBUS_LISTEN;
1647 vmbus->msg_in_progress = false;
1648 }
1649
handle_initiate_contact(VMBus * vmbus,vmbus_message_initiate_contact * msg,uint32_t msglen)1650 static void handle_initiate_contact(VMBus *vmbus,
1651 vmbus_message_initiate_contact *msg,
1652 uint32_t msglen)
1653 {
1654 if (msglen < sizeof(*msg)) {
1655 return;
1656 }
1657
1658 trace_vmbus_initiate_contact(msg->version_requested >> 16,
1659 msg->version_requested & 0xffff,
1660 msg->target_vcpu, msg->monitor_page1,
1661 msg->monitor_page2, msg->interrupt_page);
1662
1663 /*
1664 * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1665 * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1666 * before handing over to OS loader.
1667 */
1668 vmbus_reset_all(vmbus);
1669
1670 vmbus->target_vp = msg->target_vcpu;
1671 vmbus->version = msg->version_requested;
1672 if (vmbus->version < VMBUS_VERSION_WIN8) {
1673 /* linux passes interrupt page even when it doesn't need it */
1674 vmbus->int_page_gpa = msg->interrupt_page;
1675 }
1676 vmbus->state = VMBUS_HANDSHAKE;
1677
1678 if (vmbus_init(vmbus)) {
1679 error_report("failed to init vmbus; aborting");
1680 vmbus_deinit(vmbus);
1681 return;
1682 }
1683 }
1684
send_handshake(VMBus * vmbus)1685 static void send_handshake(VMBus *vmbus)
1686 {
1687 struct vmbus_message_version_response msg = {
1688 .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1689 .version_supported = vmbus_initialized(vmbus),
1690 };
1691
1692 post_msg(vmbus, &msg, sizeof(msg));
1693 }
1694
handle_request_offers(VMBus * vmbus,void * msgdata,uint32_t msglen)1695 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1696 {
1697 VMBusChannel *chan;
1698
1699 if (!vmbus_initialized(vmbus)) {
1700 return;
1701 }
1702
1703 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1704 if (chan->offer_state == VMOFFER_INIT) {
1705 chan->offer_state = VMOFFER_SENDING;
1706 break;
1707 }
1708 }
1709
1710 vmbus->state = VMBUS_OFFER;
1711 }
1712
send_offer(VMBus * vmbus)1713 static void send_offer(VMBus *vmbus)
1714 {
1715 VMBusChannel *chan;
1716 struct vmbus_message_header alloffers_msg = {
1717 .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1718 };
1719
1720 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1721 if (chan->offer_state == VMOFFER_SENDING) {
1722 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1723 /* Hyper-V wants LE GUIDs */
1724 QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1725 QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1726 struct vmbus_message_offer_channel msg = {
1727 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1728 .child_relid = chan->id,
1729 .connection_id = chan_connection_id(chan),
1730 .channel_flags = vdc->channel_flags,
1731 .mmio_size_mb = vdc->mmio_size_mb,
1732 .sub_channel_index = vmbus_channel_idx(chan),
1733 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1734 };
1735
1736 memcpy(msg.type_uuid, &classid, sizeof(classid));
1737 memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1738
1739 trace_vmbus_send_offer(chan->id, chan->dev);
1740
1741 post_msg(vmbus, &msg, sizeof(msg));
1742 return;
1743 }
1744 }
1745
1746 /* no more offers, send terminator message */
1747 trace_vmbus_terminate_offers();
1748 post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1749 }
1750
complete_offer(VMBus * vmbus)1751 static bool complete_offer(VMBus *vmbus)
1752 {
1753 VMBusChannel *chan;
1754
1755 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1756 if (chan->offer_state == VMOFFER_SENDING) {
1757 chan->offer_state = VMOFFER_SENT;
1758 goto next_offer;
1759 }
1760 }
1761 /*
1762 * no transitioning channels found so this is completing the terminator
1763 * message, and vmbus can move to the next state
1764 */
1765 return true;
1766
1767 next_offer:
1768 /* try to mark another channel for offering */
1769 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1770 if (chan->offer_state == VMOFFER_INIT) {
1771 chan->offer_state = VMOFFER_SENDING;
1772 break;
1773 }
1774 }
1775 /*
1776 * if an offer has been sent there are more offers or the terminator yet to
1777 * send, so no state transition for vmbus
1778 */
1779 return false;
1780 }
1781
1782
handle_gpadl_header(VMBus * vmbus,vmbus_message_gpadl_header * msg,uint32_t msglen)1783 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1784 uint32_t msglen)
1785 {
1786 VMBusGpadl *gpadl;
1787 uint32_t num_gfns, i;
1788
1789 /* must include at least one gpa range */
1790 if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1791 !vmbus_initialized(vmbus)) {
1792 return;
1793 }
1794
1795 num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1796 sizeof(msg->range[0].pfn_array[0]);
1797
1798 trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1799
1800 /*
1801 * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1802 * ranges each with arbitrary size and alignment. However in practice only
1803 * single-range page-aligned GPADLs have been observed so just ignore
1804 * anything else and simplify things greatly.
1805 */
1806 if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1807 (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1808 return;
1809 }
1810
1811 /* ignore requests to create already existing GPADLs */
1812 if (find_gpadl(vmbus, msg->gpadl_id)) {
1813 return;
1814 }
1815
1816 gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1817
1818 for (i = 0; i < num_gfns &&
1819 (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1820 i++) {
1821 gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1822 }
1823
1824 if (gpadl_full(gpadl)) {
1825 vmbus->state = VMBUS_CREATE_GPADL;
1826 }
1827 }
1828
handle_gpadl_body(VMBus * vmbus,vmbus_message_gpadl_body * msg,uint32_t msglen)1829 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1830 uint32_t msglen)
1831 {
1832 VMBusGpadl *gpadl;
1833 uint32_t num_gfns_left, i;
1834
1835 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1836 return;
1837 }
1838
1839 trace_vmbus_gpadl_body(msg->gpadl_id);
1840
1841 gpadl = find_gpadl(vmbus, msg->gpadl_id);
1842 if (!gpadl) {
1843 return;
1844 }
1845
1846 num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1847 assert(num_gfns_left);
1848
1849 for (i = 0; i < num_gfns_left &&
1850 (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1851 gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1852 }
1853
1854 if (gpadl_full(gpadl)) {
1855 vmbus->state = VMBUS_CREATE_GPADL;
1856 }
1857 }
1858
send_create_gpadl(VMBus * vmbus)1859 static void send_create_gpadl(VMBus *vmbus)
1860 {
1861 VMBusGpadl *gpadl;
1862
1863 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1864 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1865 struct vmbus_message_gpadl_created msg = {
1866 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1867 .gpadl_id = gpadl->id,
1868 .child_relid = gpadl->child_relid,
1869 };
1870
1871 trace_vmbus_gpadl_created(gpadl->id);
1872 post_msg(vmbus, &msg, sizeof(msg));
1873 return;
1874 }
1875 }
1876
1877 g_assert_not_reached();
1878 }
1879
complete_create_gpadl(VMBus * vmbus)1880 static bool complete_create_gpadl(VMBus *vmbus)
1881 {
1882 VMBusGpadl *gpadl;
1883
1884 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1885 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1886 gpadl->state = VMGPADL_ALIVE;
1887
1888 return true;
1889 }
1890 }
1891
1892 g_assert_not_reached();
1893 }
1894
handle_gpadl_teardown(VMBus * vmbus,vmbus_message_gpadl_teardown * msg,uint32_t msglen)1895 static void handle_gpadl_teardown(VMBus *vmbus,
1896 vmbus_message_gpadl_teardown *msg,
1897 uint32_t msglen)
1898 {
1899 VMBusGpadl *gpadl;
1900
1901 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1902 return;
1903 }
1904
1905 trace_vmbus_gpadl_teardown(msg->gpadl_id);
1906
1907 gpadl = find_gpadl(vmbus, msg->gpadl_id);
1908 if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
1909 return;
1910 }
1911
1912 gpadl->state = VMGPADL_TEARINGDOWN;
1913 vmbus->state = VMBUS_TEARDOWN_GPADL;
1914 }
1915
send_teardown_gpadl(VMBus * vmbus)1916 static void send_teardown_gpadl(VMBus *vmbus)
1917 {
1918 VMBusGpadl *gpadl;
1919
1920 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1921 if (gpadl->state == VMGPADL_TEARINGDOWN) {
1922 struct vmbus_message_gpadl_torndown msg = {
1923 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
1924 .gpadl_id = gpadl->id,
1925 };
1926
1927 trace_vmbus_gpadl_torndown(gpadl->id);
1928 post_msg(vmbus, &msg, sizeof(msg));
1929 return;
1930 }
1931 }
1932
1933 g_assert_not_reached();
1934 }
1935
complete_teardown_gpadl(VMBus * vmbus)1936 static bool complete_teardown_gpadl(VMBus *vmbus)
1937 {
1938 VMBusGpadl *gpadl;
1939
1940 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1941 if (gpadl->state == VMGPADL_TEARINGDOWN) {
1942 gpadl->state = VMGPADL_TORNDOWN;
1943 vmbus_put_gpadl(gpadl);
1944 return true;
1945 }
1946 }
1947
1948 g_assert_not_reached();
1949 }
1950
handle_open_channel(VMBus * vmbus,vmbus_message_open_channel * msg,uint32_t msglen)1951 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
1952 uint32_t msglen)
1953 {
1954 VMBusChannel *chan;
1955
1956 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1957 return;
1958 }
1959
1960 trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
1961 msg->target_vp);
1962 chan = find_channel(vmbus, msg->child_relid);
1963 if (!chan || chan->state != VMCHAN_INIT) {
1964 return;
1965 }
1966
1967 chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
1968 chan->ringbuf_send_offset = msg->ring_buffer_offset;
1969 chan->target_vp = msg->target_vp;
1970 chan->open_id = msg->open_id;
1971
1972 open_channel(chan);
1973
1974 chan->state = VMCHAN_OPENING;
1975 vmbus->state = VMBUS_OPEN_CHANNEL;
1976 }
1977
send_open_channel(VMBus * vmbus)1978 static void send_open_channel(VMBus *vmbus)
1979 {
1980 VMBusChannel *chan;
1981
1982 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1983 if (chan->state == VMCHAN_OPENING) {
1984 struct vmbus_message_open_result msg = {
1985 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
1986 .child_relid = chan->id,
1987 .open_id = chan->open_id,
1988 .status = !vmbus_channel_is_open(chan),
1989 };
1990
1991 trace_vmbus_channel_open(chan->id, msg.status);
1992 post_msg(vmbus, &msg, sizeof(msg));
1993 return;
1994 }
1995 }
1996
1997 g_assert_not_reached();
1998 }
1999
complete_open_channel(VMBus * vmbus)2000 static bool complete_open_channel(VMBus *vmbus)
2001 {
2002 VMBusChannel *chan;
2003
2004 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2005 if (chan->state == VMCHAN_OPENING) {
2006 if (vmbus_channel_is_open(chan)) {
2007 chan->state = VMCHAN_OPEN;
2008 /*
2009 * simulate guest notification of ringbuffer space made
2010 * available, for the channel protocols where the host
2011 * initiates the communication
2012 */
2013 vmbus_channel_notify_host(chan);
2014 } else {
2015 chan->state = VMCHAN_INIT;
2016 }
2017 return true;
2018 }
2019 }
2020
2021 g_assert_not_reached();
2022 }
2023
vdev_reset_on_close(VMBusDevice * vdev)2024 static void vdev_reset_on_close(VMBusDevice *vdev)
2025 {
2026 uint16_t i;
2027
2028 for (i = 0; i < vdev->num_channels; i++) {
2029 if (vmbus_channel_is_open(&vdev->channels[i])) {
2030 return;
2031 }
2032 }
2033
2034 /* all channels closed -- reset device */
2035 device_cold_reset(DEVICE(vdev));
2036 }
2037
handle_close_channel(VMBus * vmbus,vmbus_message_close_channel * msg,uint32_t msglen)2038 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2039 uint32_t msglen)
2040 {
2041 VMBusChannel *chan;
2042
2043 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2044 return;
2045 }
2046
2047 trace_vmbus_close_channel(msg->child_relid);
2048
2049 chan = find_channel(vmbus, msg->child_relid);
2050 if (!chan) {
2051 return;
2052 }
2053
2054 close_channel(chan);
2055 chan->state = VMCHAN_INIT;
2056
2057 vdev_reset_on_close(chan->dev);
2058 }
2059
handle_unload(VMBus * vmbus,void * msg,uint32_t msglen)2060 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2061 {
2062 vmbus->state = VMBUS_UNLOAD;
2063 }
2064
send_unload(VMBus * vmbus)2065 static void send_unload(VMBus *vmbus)
2066 {
2067 vmbus_message_header msg = {
2068 .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2069 };
2070
2071 qemu_mutex_lock(&vmbus->rx_queue_lock);
2072 vmbus->rx_queue_size = 0;
2073 qemu_mutex_unlock(&vmbus->rx_queue_lock);
2074
2075 post_msg(vmbus, &msg, sizeof(msg));
2076 return;
2077 }
2078
complete_unload(VMBus * vmbus)2079 static bool complete_unload(VMBus *vmbus)
2080 {
2081 vmbus_reset_all(vmbus);
2082 return true;
2083 }
2084
process_message(VMBus * vmbus)2085 static void process_message(VMBus *vmbus)
2086 {
2087 struct hyperv_post_message_input *hv_msg;
2088 struct vmbus_message_header *msg;
2089 void *msgdata;
2090 uint32_t msglen;
2091
2092 qemu_mutex_lock(&vmbus->rx_queue_lock);
2093
2094 if (!vmbus->rx_queue_size) {
2095 goto unlock;
2096 }
2097
2098 hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2099 msglen = hv_msg->payload_size;
2100 if (msglen < sizeof(*msg)) {
2101 goto out;
2102 }
2103 msgdata = hv_msg->payload;
2104 msg = msgdata;
2105
2106 trace_vmbus_process_incoming_message(msg->message_type);
2107
2108 switch (msg->message_type) {
2109 case VMBUS_MSG_INITIATE_CONTACT:
2110 handle_initiate_contact(vmbus, msgdata, msglen);
2111 break;
2112 case VMBUS_MSG_REQUESTOFFERS:
2113 handle_request_offers(vmbus, msgdata, msglen);
2114 break;
2115 case VMBUS_MSG_GPADL_HEADER:
2116 handle_gpadl_header(vmbus, msgdata, msglen);
2117 break;
2118 case VMBUS_MSG_GPADL_BODY:
2119 handle_gpadl_body(vmbus, msgdata, msglen);
2120 break;
2121 case VMBUS_MSG_GPADL_TEARDOWN:
2122 handle_gpadl_teardown(vmbus, msgdata, msglen);
2123 break;
2124 case VMBUS_MSG_OPENCHANNEL:
2125 handle_open_channel(vmbus, msgdata, msglen);
2126 break;
2127 case VMBUS_MSG_CLOSECHANNEL:
2128 handle_close_channel(vmbus, msgdata, msglen);
2129 break;
2130 case VMBUS_MSG_UNLOAD:
2131 handle_unload(vmbus, msgdata, msglen);
2132 break;
2133 default:
2134 error_report("unknown message type %#x", msg->message_type);
2135 break;
2136 }
2137
2138 out:
2139 vmbus->rx_queue_size--;
2140 vmbus->rx_queue_head++;
2141 vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2142
2143 vmbus_resched(vmbus);
2144 unlock:
2145 qemu_mutex_unlock(&vmbus->rx_queue_lock);
2146 }
2147
2148 static const struct {
2149 void (*run)(VMBus *vmbus);
2150 bool (*complete)(VMBus *vmbus);
2151 } state_runner[] = {
2152 [VMBUS_LISTEN] = {process_message, NULL},
2153 [VMBUS_HANDSHAKE] = {send_handshake, NULL},
2154 [VMBUS_OFFER] = {send_offer, complete_offer},
2155 [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl},
2156 [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2157 [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel},
2158 [VMBUS_UNLOAD] = {send_unload, complete_unload},
2159 };
2160
vmbus_do_run(VMBus * vmbus)2161 static void vmbus_do_run(VMBus *vmbus)
2162 {
2163 if (vmbus->msg_in_progress) {
2164 return;
2165 }
2166
2167 assert(vmbus->state < VMBUS_STATE_MAX);
2168 assert(state_runner[vmbus->state].run);
2169 state_runner[vmbus->state].run(vmbus);
2170 }
2171
vmbus_run(void * opaque)2172 static void vmbus_run(void *opaque)
2173 {
2174 VMBus *vmbus = opaque;
2175
2176 /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2177 if (vmbus->in_progress) {
2178 return;
2179 }
2180
2181 vmbus->in_progress = true;
2182 /*
2183 * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2184 * should go *after* the code that can result in aio_poll; otherwise
2185 * reschedules can be missed. No idea how to enforce that.
2186 */
2187 vmbus_do_run(vmbus);
2188 vmbus->in_progress = false;
2189 }
2190
vmbus_msg_cb(void * data,int status)2191 static void vmbus_msg_cb(void *data, int status)
2192 {
2193 VMBus *vmbus = data;
2194 bool (*complete)(VMBus *vmbus);
2195
2196 assert(vmbus->msg_in_progress);
2197
2198 trace_vmbus_msg_cb(status);
2199
2200 if (status == -EAGAIN) {
2201 goto out;
2202 }
2203 if (status) {
2204 error_report("message delivery fatal failure: %d; aborting vmbus",
2205 status);
2206 vmbus_reset_all(vmbus);
2207 return;
2208 }
2209
2210 assert(vmbus->state < VMBUS_STATE_MAX);
2211 complete = state_runner[vmbus->state].complete;
2212 if (!complete || complete(vmbus)) {
2213 vmbus->state = VMBUS_LISTEN;
2214 }
2215 out:
2216 vmbus->msg_in_progress = false;
2217 vmbus_resched(vmbus);
2218 }
2219
vmbus_resched(VMBus * vmbus)2220 static void vmbus_resched(VMBus *vmbus)
2221 {
2222 aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2223 }
2224
vmbus_signal_event(EventNotifier * e)2225 static void vmbus_signal_event(EventNotifier *e)
2226 {
2227 VMBusChannel *chan;
2228 VMBus *vmbus = container_of(e, VMBus, notifier);
2229 unsigned long *int_map;
2230 hwaddr addr, len;
2231 bool is_dirty = false;
2232
2233 if (!event_notifier_test_and_clear(e)) {
2234 return;
2235 }
2236
2237 trace_vmbus_signal_event();
2238
2239 if (!vmbus->int_page_gpa) {
2240 return;
2241 }
2242
2243 addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2244 len = TARGET_PAGE_SIZE / 2;
2245 int_map = cpu_physical_memory_map(addr, &len, 1);
2246 if (len != TARGET_PAGE_SIZE / 2) {
2247 goto unmap;
2248 }
2249
2250 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2251 if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2252 if (!vmbus_channel_is_open(chan)) {
2253 continue;
2254 }
2255 vmbus_channel_notify_host(chan);
2256 is_dirty = true;
2257 }
2258 }
2259
2260 unmap:
2261 cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2262 }
2263
vmbus_dev_realize(DeviceState * dev,Error ** errp)2264 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2265 {
2266 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2267 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2268 VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2269 BusChild *child;
2270 Error *err = NULL;
2271 char idstr[UUID_STR_LEN];
2272
2273 assert(!qemu_uuid_is_null(&vdev->instanceid));
2274
2275 if (!qemu_uuid_is_null(&vdc->instanceid)) {
2276 /* Class wants to only have a single instance with a fixed UUID */
2277 if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2278 error_setg(&err, "instance id can't be changed");
2279 goto error_out;
2280 }
2281 }
2282
2283 /* Check for instance id collision for this class id */
2284 QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2285 VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2286
2287 if (child_dev == vdev) {
2288 continue;
2289 }
2290
2291 if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2292 qemu_uuid_unparse(&vdev->instanceid, idstr);
2293 error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2294 goto error_out;
2295 }
2296 }
2297
2298 vdev->dma_as = &address_space_memory;
2299
2300 create_channels(vmbus, vdev, &err);
2301 if (err) {
2302 goto error_out;
2303 }
2304
2305 if (vdc->vmdev_realize) {
2306 vdc->vmdev_realize(vdev, &err);
2307 if (err) {
2308 goto err_vdc_realize;
2309 }
2310 }
2311 return;
2312
2313 err_vdc_realize:
2314 free_channels(vdev);
2315 error_out:
2316 error_propagate(errp, err);
2317 }
2318
vmbus_dev_reset(DeviceState * dev)2319 static void vmbus_dev_reset(DeviceState *dev)
2320 {
2321 uint16_t i;
2322 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2323 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2324
2325 if (vdev->channels) {
2326 for (i = 0; i < vdev->num_channels; i++) {
2327 VMBusChannel *chan = &vdev->channels[i];
2328 close_channel(chan);
2329 chan->state = VMCHAN_INIT;
2330 }
2331 }
2332
2333 if (vdc->vmdev_reset) {
2334 vdc->vmdev_reset(vdev);
2335 }
2336 }
2337
vmbus_dev_unrealize(DeviceState * dev)2338 static void vmbus_dev_unrealize(DeviceState *dev)
2339 {
2340 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2341 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2342
2343 if (vdc->vmdev_unrealize) {
2344 vdc->vmdev_unrealize(vdev);
2345 }
2346 free_channels(vdev);
2347 }
2348
2349 static Property vmbus_dev_props[] = {
2350 DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2351 DEFINE_PROP_END_OF_LIST()
2352 };
2353
2354
vmbus_dev_class_init(ObjectClass * klass,void * data)2355 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2356 {
2357 DeviceClass *kdev = DEVICE_CLASS(klass);
2358 device_class_set_props(kdev, vmbus_dev_props);
2359 kdev->bus_type = TYPE_VMBUS;
2360 kdev->realize = vmbus_dev_realize;
2361 kdev->unrealize = vmbus_dev_unrealize;
2362 device_class_set_legacy_reset(kdev, vmbus_dev_reset);
2363 }
2364
vmbus_dev_instance_init(Object * obj)2365 static void vmbus_dev_instance_init(Object *obj)
2366 {
2367 VMBusDevice *vdev = VMBUS_DEVICE(obj);
2368 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2369
2370 if (!qemu_uuid_is_null(&vdc->instanceid)) {
2371 /* Class wants to only have a single instance with a fixed UUID */
2372 vdev->instanceid = vdc->instanceid;
2373 }
2374 }
2375
2376 const VMStateDescription vmstate_vmbus_dev = {
2377 .name = TYPE_VMBUS_DEVICE,
2378 .version_id = 0,
2379 .minimum_version_id = 0,
2380 .fields = (const VMStateField[]) {
2381 VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2382 VMSTATE_UINT16(num_channels, VMBusDevice),
2383 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2384 num_channels, vmstate_channel,
2385 VMBusChannel),
2386 VMSTATE_END_OF_LIST()
2387 }
2388 };
2389
2390 /* vmbus generic device base */
2391 static const TypeInfo vmbus_dev_type_info = {
2392 .name = TYPE_VMBUS_DEVICE,
2393 .parent = TYPE_DEVICE,
2394 .abstract = true,
2395 .instance_size = sizeof(VMBusDevice),
2396 .class_size = sizeof(VMBusDeviceClass),
2397 .class_init = vmbus_dev_class_init,
2398 .instance_init = vmbus_dev_instance_init,
2399 };
2400
vmbus_realize(BusState * bus,Error ** errp)2401 static void vmbus_realize(BusState *bus, Error **errp)
2402 {
2403 int ret = 0;
2404 VMBus *vmbus = VMBUS(bus);
2405
2406 qemu_mutex_init(&vmbus->rx_queue_lock);
2407
2408 QTAILQ_INIT(&vmbus->gpadl_list);
2409 QTAILQ_INIT(&vmbus->channel_list);
2410
2411 ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2412 vmbus_recv_message, vmbus);
2413 if (ret != 0) {
2414 error_setg(errp, "hyperv set message handler failed: %d", ret);
2415 goto error_out;
2416 }
2417
2418 ret = event_notifier_init(&vmbus->notifier, 0);
2419 if (ret != 0) {
2420 error_setg(errp, "event notifier failed to init with %d", ret);
2421 goto remove_msg_handler;
2422 }
2423
2424 event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2425 ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2426 &vmbus->notifier);
2427 if (ret != 0) {
2428 error_setg(errp, "hyperv set event handler failed with %d", ret);
2429 goto clear_event_notifier;
2430 }
2431
2432 return;
2433
2434 clear_event_notifier:
2435 event_notifier_cleanup(&vmbus->notifier);
2436 remove_msg_handler:
2437 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2438 error_out:
2439 qemu_mutex_destroy(&vmbus->rx_queue_lock);
2440 }
2441
vmbus_unrealize(BusState * bus)2442 static void vmbus_unrealize(BusState *bus)
2443 {
2444 VMBus *vmbus = VMBUS(bus);
2445
2446 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2447 hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2448 event_notifier_cleanup(&vmbus->notifier);
2449
2450 qemu_mutex_destroy(&vmbus->rx_queue_lock);
2451 }
2452
vmbus_reset_hold(Object * obj,ResetType type)2453 static void vmbus_reset_hold(Object *obj, ResetType type)
2454 {
2455 vmbus_deinit(VMBUS(obj));
2456 }
2457
vmbus_get_dev_path(DeviceState * dev)2458 static char *vmbus_get_dev_path(DeviceState *dev)
2459 {
2460 BusState *bus = qdev_get_parent_bus(dev);
2461 return qdev_get_dev_path(bus->parent);
2462 }
2463
vmbus_get_fw_dev_path(DeviceState * dev)2464 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2465 {
2466 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2467 char uuid[UUID_STR_LEN];
2468
2469 qemu_uuid_unparse(&vdev->instanceid, uuid);
2470 return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2471 }
2472
vmbus_class_init(ObjectClass * klass,void * data)2473 static void vmbus_class_init(ObjectClass *klass, void *data)
2474 {
2475 BusClass *k = BUS_CLASS(klass);
2476 ResettableClass *rc = RESETTABLE_CLASS(klass);
2477
2478 k->get_dev_path = vmbus_get_dev_path;
2479 k->get_fw_dev_path = vmbus_get_fw_dev_path;
2480 k->realize = vmbus_realize;
2481 k->unrealize = vmbus_unrealize;
2482 rc->phases.hold = vmbus_reset_hold;
2483 }
2484
vmbus_pre_load(void * opaque)2485 static int vmbus_pre_load(void *opaque)
2486 {
2487 VMBusChannel *chan;
2488 VMBus *vmbus = VMBUS(opaque);
2489
2490 /*
2491 * channel IDs allocated by the source will come in the migration stream
2492 * for each channel, so clean up the ones allocated at realize
2493 */
2494 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2495 unregister_chan_id(chan);
2496 }
2497
2498 return 0;
2499 }
vmbus_post_load(void * opaque,int version_id)2500 static int vmbus_post_load(void *opaque, int version_id)
2501 {
2502 int ret;
2503 VMBus *vmbus = VMBUS(opaque);
2504 VMBusGpadl *gpadl;
2505 VMBusChannel *chan;
2506
2507 ret = vmbus_init(vmbus);
2508 if (ret) {
2509 return ret;
2510 }
2511
2512 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2513 gpadl->vmbus = vmbus;
2514 gpadl->refcount = 1;
2515 }
2516
2517 /*
2518 * reopening channels depends on initialized vmbus so it's done here
2519 * instead of channel_post_load()
2520 */
2521 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2522
2523 if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2524 open_channel(chan);
2525 }
2526
2527 if (chan->state != VMCHAN_OPEN) {
2528 continue;
2529 }
2530
2531 if (!vmbus_channel_is_open(chan)) {
2532 /* reopen failed, abort loading */
2533 return -1;
2534 }
2535
2536 /* resume processing on the guest side if it missed the notification */
2537 hyperv_sint_route_set_sint(chan->notify_route);
2538 /* ditto on the host side */
2539 vmbus_channel_notify_host(chan);
2540 }
2541
2542 vmbus_resched(vmbus);
2543 return 0;
2544 }
2545
2546 static const VMStateDescription vmstate_post_message_input = {
2547 .name = "vmbus/hyperv_post_message_input",
2548 .version_id = 0,
2549 .minimum_version_id = 0,
2550 .fields = (const VMStateField[]) {
2551 /*
2552 * skip connection_id and message_type as they are validated before
2553 * queueing and ignored on dequeueing
2554 */
2555 VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2556 VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2557 HV_MESSAGE_PAYLOAD_SIZE),
2558 VMSTATE_END_OF_LIST()
2559 }
2560 };
2561
vmbus_rx_queue_needed(void * opaque)2562 static bool vmbus_rx_queue_needed(void *opaque)
2563 {
2564 VMBus *vmbus = VMBUS(opaque);
2565 return vmbus->rx_queue_size;
2566 }
2567
2568 static const VMStateDescription vmstate_rx_queue = {
2569 .name = "vmbus/rx_queue",
2570 .version_id = 0,
2571 .minimum_version_id = 0,
2572 .needed = vmbus_rx_queue_needed,
2573 .fields = (const VMStateField[]) {
2574 VMSTATE_UINT8(rx_queue_head, VMBus),
2575 VMSTATE_UINT8(rx_queue_size, VMBus),
2576 VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2577 HV_MSG_QUEUE_LEN, 0,
2578 vmstate_post_message_input,
2579 struct hyperv_post_message_input),
2580 VMSTATE_END_OF_LIST()
2581 }
2582 };
2583
2584 static const VMStateDescription vmstate_vmbus = {
2585 .name = TYPE_VMBUS,
2586 .version_id = 0,
2587 .minimum_version_id = 0,
2588 .pre_load = vmbus_pre_load,
2589 .post_load = vmbus_post_load,
2590 .fields = (const VMStateField[]) {
2591 VMSTATE_UINT8(state, VMBus),
2592 VMSTATE_UINT32(version, VMBus),
2593 VMSTATE_UINT32(target_vp, VMBus),
2594 VMSTATE_UINT64(int_page_gpa, VMBus),
2595 VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2596 vmstate_gpadl, VMBusGpadl, link),
2597 VMSTATE_END_OF_LIST()
2598 },
2599 .subsections = (const VMStateDescription * const []) {
2600 &vmstate_rx_queue,
2601 NULL
2602 }
2603 };
2604
2605 static const TypeInfo vmbus_type_info = {
2606 .name = TYPE_VMBUS,
2607 .parent = TYPE_BUS,
2608 .instance_size = sizeof(VMBus),
2609 .class_init = vmbus_class_init,
2610 };
2611
vmbus_bridge_realize(DeviceState * dev,Error ** errp)2612 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2613 {
2614 VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2615
2616 /*
2617 * here there's at least one vmbus bridge that is being realized, so
2618 * vmbus_bridge_find can only return NULL if it's not unique
2619 */
2620 if (!vmbus_bridge_find()) {
2621 error_setg(errp, "there can be at most one %s in the system",
2622 TYPE_VMBUS_BRIDGE);
2623 return;
2624 }
2625
2626 if (!hyperv_is_synic_enabled()) {
2627 error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2628 return;
2629 }
2630
2631 if (!hyperv_are_vmbus_recommended_features_enabled()) {
2632 warn_report("VMBus enabled without the recommended set of Hyper-V features: "
2633 "hv-stimer, hv-vapic and hv-runtime. "
2634 "Some Windows versions might not boot or enable the VMBus device");
2635 }
2636
2637 bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2638 }
2639
vmbus_bridge_ofw_unit_address(const SysBusDevice * dev)2640 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2641 {
2642 /* there can be only one VMBus */
2643 return g_strdup("0");
2644 }
2645
2646 static const VMStateDescription vmstate_vmbus_bridge = {
2647 .name = TYPE_VMBUS_BRIDGE,
2648 .version_id = 0,
2649 .minimum_version_id = 0,
2650 .fields = (const VMStateField[]) {
2651 VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2652 VMSTATE_END_OF_LIST()
2653 },
2654 };
2655
2656 static Property vmbus_bridge_props[] = {
2657 DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2658 DEFINE_PROP_END_OF_LIST()
2659 };
2660
vmbus_bridge_class_init(ObjectClass * klass,void * data)2661 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2662 {
2663 DeviceClass *k = DEVICE_CLASS(klass);
2664 SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2665
2666 k->realize = vmbus_bridge_realize;
2667 k->fw_name = "vmbus";
2668 sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2669 set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2670 k->vmsd = &vmstate_vmbus_bridge;
2671 device_class_set_props(k, vmbus_bridge_props);
2672 /* override SysBusDevice's default */
2673 k->user_creatable = true;
2674 }
2675
2676 static const TypeInfo vmbus_bridge_type_info = {
2677 .name = TYPE_VMBUS_BRIDGE,
2678 .parent = TYPE_SYS_BUS_DEVICE,
2679 .instance_size = sizeof(VMBusBridge),
2680 .class_init = vmbus_bridge_class_init,
2681 };
2682
vmbus_register_types(void)2683 static void vmbus_register_types(void)
2684 {
2685 type_register_static(&vmbus_bridge_type_info);
2686 type_register_static(&vmbus_dev_type_info);
2687 type_register_static(&vmbus_type_info);
2688 }
2689
2690 type_init(vmbus_register_types)
2691