xref: /openbmc/qemu/hw/virtio/virtio.c (revision 69779192acfeb9480183fd076be7480de56b1009)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/qapi-commands-virtio.h"
18 #include "qapi/qapi-commands-qom.h"
19 #include "qapi/qapi-visit-virtio.h"
20 #include "qapi/qmp/qjson.h"
21 #include "cpu.h"
22 #include "trace.h"
23 #include "qemu/error-report.h"
24 #include "qemu/log.h"
25 #include "qemu/main-loop.h"
26 #include "qemu/module.h"
27 #include "qom/object_interfaces.h"
28 #include "hw/core/cpu.h"
29 #include "hw/virtio/virtio.h"
30 #include "migration/qemu-file-types.h"
31 #include "qemu/atomic.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "hw/qdev-properties.h"
34 #include "hw/virtio/virtio-access.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/runstate.h"
37 #include "standard-headers/linux/virtio_ids.h"
38 #include "standard-headers/linux/vhost_types.h"
39 #include "standard-headers/linux/virtio_blk.h"
40 #include "standard-headers/linux/virtio_console.h"
41 #include "standard-headers/linux/virtio_gpu.h"
42 #include "standard-headers/linux/virtio_net.h"
43 #include "standard-headers/linux/virtio_scsi.h"
44 #include "standard-headers/linux/virtio_i2c.h"
45 #include "standard-headers/linux/virtio_balloon.h"
46 #include "standard-headers/linux/virtio_iommu.h"
47 #include "standard-headers/linux/virtio_mem.h"
48 #include "standard-headers/linux/virtio_vsock.h"
49 #include CONFIG_DEVICES
50 
51 /* QAPI list of realized VirtIODevices */
52 static QTAILQ_HEAD(, VirtIODevice) virtio_list;
53 
54 /*
55  * Maximum size of virtio device config space
56  */
57 #define VHOST_USER_MAX_CONFIG_SIZE 256
58 
59 #define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \
60     { .virtio_bit = name, .feature_desc = desc }
61 
62 enum VhostUserProtocolFeature {
63     VHOST_USER_PROTOCOL_F_MQ = 0,
64     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
65     VHOST_USER_PROTOCOL_F_RARP = 2,
66     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
67     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
68     VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
69     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
70     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
71     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
72     VHOST_USER_PROTOCOL_F_CONFIG = 9,
73     VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
74     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
75     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
76     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
77     VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
78     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
79     VHOST_USER_PROTOCOL_F_MAX
80 };
81 
82 /* Virtio transport features mapping */
83 static const qmp_virtio_feature_map_t virtio_transport_map[] = {
84     /* Virtio device transport features */
85 #ifndef VIRTIO_CONFIG_NO_LEGACY
86     FEATURE_ENTRY(VIRTIO_F_NOTIFY_ON_EMPTY, \
87             "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. "
88             "descs. on VQ"),
89     FEATURE_ENTRY(VIRTIO_F_ANY_LAYOUT, \
90             "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts"),
91 #endif /* !VIRTIO_CONFIG_NO_LEGACY */
92     FEATURE_ENTRY(VIRTIO_F_VERSION_1, \
93             "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"),
94     FEATURE_ENTRY(VIRTIO_F_IOMMU_PLATFORM, \
95             "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform"),
96     FEATURE_ENTRY(VIRTIO_F_RING_PACKED, \
97             "VIRTIO_F_RING_PACKED: Device supports packed VQ layout"),
98     FEATURE_ENTRY(VIRTIO_F_IN_ORDER, \
99             "VIRTIO_F_IN_ORDER: Device uses buffers in same order as made "
100             "available by driver"),
101     FEATURE_ENTRY(VIRTIO_F_ORDER_PLATFORM, \
102             "VIRTIO_F_ORDER_PLATFORM: Memory accesses ordered by platform"),
103     FEATURE_ENTRY(VIRTIO_F_SR_IOV, \
104             "VIRTIO_F_SR_IOV: Device supports single root I/O virtualization"),
105     /* Virtio ring transport features */
106     FEATURE_ENTRY(VIRTIO_RING_F_INDIRECT_DESC, \
107             "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported"),
108     FEATURE_ENTRY(VIRTIO_RING_F_EVENT_IDX, \
109             "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled"),
110     { -1, "" }
111 };
112 
113 /* Vhost-user protocol features mapping */
114 static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = {
115     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_MQ, \
116             "VHOST_USER_PROTOCOL_F_MQ: Multiqueue protocol supported"),
117     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_LOG_SHMFD, \
118             "VHOST_USER_PROTOCOL_F_LOG_SHMFD: Shared log memory fd supported"),
119     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RARP, \
120             "VHOST_USER_PROTOCOL_F_RARP: Vhost-user back-end RARP broadcasting "
121             "supported"),
122     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_REPLY_ACK, \
123             "VHOST_USER_PROTOCOL_F_REPLY_ACK: Requested operation status ack. "
124             "supported"),
125     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \
126             "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"),
127     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \
128             "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated "
129             "requests supported"),
130     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \
131             "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy "
132             "devices supported"),
133     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CRYPTO_SESSION, \
134             "VHOST_USER_PROTOCOL_F_CRYPTO_SESSION: Session creation for crypto "
135             "operations supported"),
136     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_PAGEFAULT, \
137             "VHOST_USER_PROTOCOL_F_PAGEFAULT: Request servicing on userfaultfd "
138             "for accessed pages supported"),
139     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \
140             "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio "
141             "device configuration space supported"),
142     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \
143             "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication "
144             "channel supported"),
145     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \
146             "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified "
147             "VQs supported"),
148     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, \
149             "VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: Shared inflight I/O buffers "
150             "supported"),
151     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RESET_DEVICE, \
152             "VHOST_USER_PROTOCOL_F_RESET_DEVICE: Disabling all rings and "
153             "resetting internal device state supported"),
154     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS, \
155             "VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS: In-band messaging "
156             "supported"),
157     FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \
158             "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for "
159             "memory slots supported"),
160     { -1, "" }
161 };
162 
163 /* virtio device configuration statuses */
164 static const qmp_virtio_feature_map_t virtio_config_status_map[] = {
165     FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER_OK, \
166             "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"),
167     FEATURE_ENTRY(VIRTIO_CONFIG_S_FEATURES_OK, \
168             "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete"),
169     FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER, \
170             "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device"),
171     FEATURE_ENTRY(VIRTIO_CONFIG_S_NEEDS_RESET, \
172             "VIRTIO_CONFIG_S_NEEDS_RESET: Irrecoverable error, device needs "
173             "reset"),
174     FEATURE_ENTRY(VIRTIO_CONFIG_S_FAILED, \
175             "VIRTIO_CONFIG_S_FAILED: Error in guest, device failed"),
176     FEATURE_ENTRY(VIRTIO_CONFIG_S_ACKNOWLEDGE, \
177             "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found"),
178     { -1, "" }
179 };
180 
181 /* virtio-blk features mapping */
182 #ifdef CONFIG_VIRTIO_BLK
183 static const qmp_virtio_feature_map_t virtio_blk_feature_map[] = {
184     FEATURE_ENTRY(VIRTIO_BLK_F_SIZE_MAX, \
185             "VIRTIO_BLK_F_SIZE_MAX: Max segment size is size_max"),
186     FEATURE_ENTRY(VIRTIO_BLK_F_SEG_MAX, \
187             "VIRTIO_BLK_F_SEG_MAX: Max segments in a request is seg_max"),
188     FEATURE_ENTRY(VIRTIO_BLK_F_GEOMETRY, \
189             "VIRTIO_BLK_F_GEOMETRY: Legacy geometry available"),
190     FEATURE_ENTRY(VIRTIO_BLK_F_RO, \
191             "VIRTIO_BLK_F_RO: Device is read-only"),
192     FEATURE_ENTRY(VIRTIO_BLK_F_BLK_SIZE, \
193             "VIRTIO_BLK_F_BLK_SIZE: Block size of disk available"),
194     FEATURE_ENTRY(VIRTIO_BLK_F_TOPOLOGY, \
195             "VIRTIO_BLK_F_TOPOLOGY: Topology information available"),
196     FEATURE_ENTRY(VIRTIO_BLK_F_MQ, \
197             "VIRTIO_BLK_F_MQ: Multiqueue supported"),
198     FEATURE_ENTRY(VIRTIO_BLK_F_DISCARD, \
199             "VIRTIO_BLK_F_DISCARD: Discard command supported"),
200     FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \
201             "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"),
202 #ifndef VIRTIO_BLK_NO_LEGACY
203     FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \
204             "VIRTIO_BLK_F_BARRIER: Request barriers supported"),
205     FEATURE_ENTRY(VIRTIO_BLK_F_SCSI, \
206             "VIRTIO_BLK_F_SCSI: SCSI packet commands supported"),
207     FEATURE_ENTRY(VIRTIO_BLK_F_FLUSH, \
208             "VIRTIO_BLK_F_FLUSH: Flush command supported"),
209     FEATURE_ENTRY(VIRTIO_BLK_F_CONFIG_WCE, \
210             "VIRTIO_BLK_F_CONFIG_WCE: Cache writeback and writethrough modes "
211             "supported"),
212 #endif /* !VIRTIO_BLK_NO_LEGACY */
213     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
214             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
215     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
216             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
217             "negotiation supported"),
218     { -1, "" }
219 };
220 #endif
221 
222 /* virtio-serial features mapping */
223 #ifdef CONFIG_VIRTIO_SERIAL
224 static const qmp_virtio_feature_map_t virtio_serial_feature_map[] = {
225     FEATURE_ENTRY(VIRTIO_CONSOLE_F_SIZE, \
226             "VIRTIO_CONSOLE_F_SIZE: Host providing console size"),
227     FEATURE_ENTRY(VIRTIO_CONSOLE_F_MULTIPORT, \
228             "VIRTIO_CONSOLE_F_MULTIPORT: Multiple ports for device supported"),
229     FEATURE_ENTRY(VIRTIO_CONSOLE_F_EMERG_WRITE, \
230             "VIRTIO_CONSOLE_F_EMERG_WRITE: Emergency write supported"),
231     { -1, "" }
232 };
233 #endif
234 
235 /* virtio-gpu features mapping */
236 #ifdef CONFIG_VIRTIO_GPU
237 static const qmp_virtio_feature_map_t virtio_gpu_feature_map[] = {
238     FEATURE_ENTRY(VIRTIO_GPU_F_VIRGL, \
239             "VIRTIO_GPU_F_VIRGL: Virgl 3D mode supported"),
240     FEATURE_ENTRY(VIRTIO_GPU_F_EDID, \
241             "VIRTIO_GPU_F_EDID: EDID metadata supported"),
242     FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_UUID, \
243             "VIRTIO_GPU_F_RESOURCE_UUID: Resource UUID assigning supported"),
244     FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_BLOB, \
245             "VIRTIO_GPU_F_RESOURCE_BLOB: Size-based blob resources supported"),
246     FEATURE_ENTRY(VIRTIO_GPU_F_CONTEXT_INIT, \
247             "VIRTIO_GPU_F_CONTEXT_INIT: Context types and synchronization "
248             "timelines supported"),
249     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
250             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
251     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
252             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
253             "negotiation supported"),
254     { -1, "" }
255 };
256 #endif
257 
258 /* virtio-input features mapping */
259 #ifdef CONFIG_VIRTIO_INPUT
260 static const qmp_virtio_feature_map_t virtio_input_feature_map[] = {
261     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
262             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
263     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
264             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
265             "negotiation supported"),
266     { -1, "" }
267 };
268 #endif
269 
270 /* virtio-net features mapping */
271 #ifdef CONFIG_VIRTIO_NET
272 static const qmp_virtio_feature_map_t virtio_net_feature_map[] = {
273     FEATURE_ENTRY(VIRTIO_NET_F_CSUM, \
274             "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum "
275             "supported"),
276     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_CSUM, \
277             "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial "
278             "checksum supported"),
279     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
280             "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading "
281             "reconfig. supported"),
282     FEATURE_ENTRY(VIRTIO_NET_F_MTU, \
283             "VIRTIO_NET_F_MTU: Device max MTU reporting supported"),
284     FEATURE_ENTRY(VIRTIO_NET_F_MAC, \
285             "VIRTIO_NET_F_MAC: Device has given MAC address"),
286     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO4, \
287             "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4"),
288     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO6, \
289             "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6"),
290     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ECN, \
291             "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN"),
292     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UFO, \
293             "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO"),
294     FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO4, \
295             "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4"),
296     FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO6, \
297             "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6"),
298     FEATURE_ENTRY(VIRTIO_NET_F_HOST_ECN, \
299             "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN"),
300     FEATURE_ENTRY(VIRTIO_NET_F_HOST_UFO, \
301             "VIRTIO_NET_F_HOST_UFO: Device can receive UFO"),
302     FEATURE_ENTRY(VIRTIO_NET_F_MRG_RXBUF, \
303             "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"),
304     FEATURE_ENTRY(VIRTIO_NET_F_STATUS, \
305             "VIRTIO_NET_F_STATUS: Configuration status field available"),
306     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VQ, \
307             "VIRTIO_NET_F_CTRL_VQ: Control channel available"),
308     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX, \
309             "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported"),
310     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VLAN, \
311             "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported"),
312     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX_EXTRA, \
313             "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported"),
314     FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ANNOUNCE, \
315             "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets "
316             "supported"),
317     FEATURE_ENTRY(VIRTIO_NET_F_MQ, \
318             "VIRTIO_NET_F_MQ: Multiqueue with automatic receive steering "
319             "supported"),
320     FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \
321             "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control "
322             "channel"),
323     FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \
324             "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"),
325     FEATURE_ENTRY(VIRTIO_NET_F_RSS, \
326             "VIRTIO_NET_F_RSS: RSS RX steering supported"),
327     FEATURE_ENTRY(VIRTIO_NET_F_RSC_EXT, \
328             "VIRTIO_NET_F_RSC_EXT: Extended coalescing info supported"),
329     FEATURE_ENTRY(VIRTIO_NET_F_STANDBY, \
330             "VIRTIO_NET_F_STANDBY: Device acting as standby for primary "
331             "device with same MAC addr. supported"),
332     FEATURE_ENTRY(VIRTIO_NET_F_SPEED_DUPLEX, \
333             "VIRTIO_NET_F_SPEED_DUPLEX: Device set linkspeed and duplex"),
334 #ifndef VIRTIO_NET_NO_LEGACY
335     FEATURE_ENTRY(VIRTIO_NET_F_GSO, \
336             "VIRTIO_NET_F_GSO: Handling GSO-type packets supported"),
337 #endif /* !VIRTIO_NET_NO_LEGACY */
338     FEATURE_ENTRY(VHOST_NET_F_VIRTIO_NET_HDR, \
339             "VHOST_NET_F_VIRTIO_NET_HDR: Virtio-net headers for RX and TX "
340             "packets supported"),
341     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
342             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
343     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
344             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
345             "negotiation supported"),
346     { -1, "" }
347 };
348 #endif
349 
350 /* virtio-scsi features mapping */
351 #ifdef CONFIG_VIRTIO_SCSI
352 static const qmp_virtio_feature_map_t virtio_scsi_feature_map[] = {
353     FEATURE_ENTRY(VIRTIO_SCSI_F_INOUT, \
354             "VIRTIO_SCSI_F_INOUT: Requests including read and writable data "
355             "buffers suppoted"),
356     FEATURE_ENTRY(VIRTIO_SCSI_F_HOTPLUG, \
357             "VIRTIO_SCSI_F_HOTPLUG: Reporting and handling hot-plug events "
358             "supported"),
359     FEATURE_ENTRY(VIRTIO_SCSI_F_CHANGE, \
360             "VIRTIO_SCSI_F_CHANGE: Reporting and handling LUN changes "
361             "supported"),
362     FEATURE_ENTRY(VIRTIO_SCSI_F_T10_PI, \
363             "VIRTIO_SCSI_F_T10_PI: T10 info included in request header"),
364     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
365             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
366     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
367             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
368             "negotiation supported"),
369     { -1, "" }
370 };
371 #endif
372 
373 /* virtio/vhost-user-fs features mapping */
374 #ifdef CONFIG_VHOST_USER_FS
375 static const qmp_virtio_feature_map_t virtio_fs_feature_map[] = {
376     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
377             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
378     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
379             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
380             "negotiation supported"),
381     { -1, "" }
382 };
383 #endif
384 
385 /* virtio/vhost-user-i2c features mapping */
386 #ifdef CONFIG_VIRTIO_I2C_ADAPTER
387 static const qmp_virtio_feature_map_t virtio_i2c_feature_map[] = {
388     FEATURE_ENTRY(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, \
389             "VIRTIO_I2C_F_ZERO_LEGNTH_REQUEST: Zero length requests supported"),
390     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
391             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
392     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
393             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
394             "negotiation supported"),
395     { -1, "" }
396 };
397 #endif
398 
399 /* virtio/vhost-vsock features mapping */
400 #ifdef CONFIG_VHOST_VSOCK
401 static const qmp_virtio_feature_map_t virtio_vsock_feature_map[] = {
402     FEATURE_ENTRY(VIRTIO_VSOCK_F_SEQPACKET, \
403             "VIRTIO_VSOCK_F_SEQPACKET: SOCK_SEQPACKET supported"),
404     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
405             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
406     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
407             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
408             "negotiation supported"),
409     { -1, "" }
410 };
411 #endif
412 
413 /* virtio-balloon features mapping */
414 #ifdef CONFIG_VIRTIO_BALLOON
415 static const qmp_virtio_feature_map_t virtio_balloon_feature_map[] = {
416     FEATURE_ENTRY(VIRTIO_BALLOON_F_MUST_TELL_HOST, \
417             "VIRTIO_BALLOON_F_MUST_TELL_HOST: Tell host before reclaiming "
418             "pages"),
419     FEATURE_ENTRY(VIRTIO_BALLOON_F_STATS_VQ, \
420             "VIRTIO_BALLOON_F_STATS_VQ: Guest memory stats VQ available"),
421     FEATURE_ENTRY(VIRTIO_BALLOON_F_DEFLATE_ON_OOM, \
422             "VIRTIO_BALLOON_F_DEFLATE_ON_OOM: Deflate balloon when guest OOM"),
423     FEATURE_ENTRY(VIRTIO_BALLOON_F_FREE_PAGE_HINT, \
424             "VIRTIO_BALLOON_F_FREE_PAGE_HINT: VQ reporting free pages enabled"),
425     FEATURE_ENTRY(VIRTIO_BALLOON_F_PAGE_POISON, \
426             "VIRTIO_BALLOON_F_PAGE_POISON: Guest page poisoning enabled"),
427     FEATURE_ENTRY(VIRTIO_BALLOON_F_REPORTING, \
428             "VIRTIO_BALLOON_F_REPORTING: Page reporting VQ enabled"),
429     { -1, "" }
430 };
431 #endif
432 
433 /* virtio-crypto features mapping */
434 #ifdef CONFIG_VIRTIO_CRYPTO
435 static const qmp_virtio_feature_map_t virtio_crypto_feature_map[] = {
436     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
437             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
438     { -1, "" }
439 };
440 #endif
441 
442 /* virtio-iommu features mapping */
443 #ifdef CONFIG_VIRTIO_IOMMU
444 static const qmp_virtio_feature_map_t virtio_iommu_feature_map[] = {
445     FEATURE_ENTRY(VIRTIO_IOMMU_F_INPUT_RANGE, \
446             "VIRTIO_IOMMU_F_INPUT_RANGE: Range of available virtual addrs. "
447             "available"),
448     FEATURE_ENTRY(VIRTIO_IOMMU_F_DOMAIN_RANGE, \
449             "VIRTIO_IOMMU_F_DOMAIN_RANGE: Number of supported domains "
450             "available"),
451     FEATURE_ENTRY(VIRTIO_IOMMU_F_MAP_UNMAP, \
452             "VIRTIO_IOMMU_F_MAP_UNMAP: Map and unmap requests available"),
453     FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS, \
454             "VIRTIO_IOMMU_F_BYPASS: Endpoints not attached to domains are in "
455             "bypass mode"),
456     FEATURE_ENTRY(VIRTIO_IOMMU_F_PROBE, \
457             "VIRTIO_IOMMU_F_PROBE: Probe requests available"),
458     FEATURE_ENTRY(VIRTIO_IOMMU_F_MMIO, \
459             "VIRTIO_IOMMU_F_MMIO: VIRTIO_IOMMU_MAP_F_MMIO flag available"),
460     FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS_CONFIG, \
461             "VIRTIO_IOMMU_F_BYPASS_CONFIG: Bypass field of IOMMU config "
462             "available"),
463     { -1, "" }
464 };
465 #endif
466 
467 /* virtio-mem features mapping */
468 #ifdef CONFIG_VIRTIO_MEM
469 static const qmp_virtio_feature_map_t virtio_mem_feature_map[] = {
470 #ifndef CONFIG_ACPI
471     FEATURE_ENTRY(VIRTIO_MEM_F_ACPI_PXM, \
472             "VIRTIO_MEM_F_ACPI_PXM: node_id is an ACPI PXM and is valid"),
473 #endif /* !CONFIG_ACPI */
474     FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \
475             "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be "
476             "accessed"),
477     { -1, "" }
478 };
479 #endif
480 
481 /* virtio-rng features mapping */
482 #ifdef CONFIG_VIRTIO_RNG
483 static const qmp_virtio_feature_map_t virtio_rng_feature_map[] = {
484     FEATURE_ENTRY(VHOST_F_LOG_ALL, \
485             "VHOST_F_LOG_ALL: Logging write descriptors supported"),
486     FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
487             "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
488             "negotiation supported"),
489     { -1, "" }
490 };
491 #endif
492 
493 /*
494  * The alignment to use between consumer and producer parts of vring.
495  * x86 pagesize again. This is the default, used by transports like PCI
496  * which don't provide a means for the guest to tell the host the alignment.
497  */
498 #define VIRTIO_PCI_VRING_ALIGN         4096
499 
500 typedef struct VRingDesc
501 {
502     uint64_t addr;
503     uint32_t len;
504     uint16_t flags;
505     uint16_t next;
506 } VRingDesc;
507 
508 typedef struct VRingPackedDesc {
509     uint64_t addr;
510     uint32_t len;
511     uint16_t id;
512     uint16_t flags;
513 } VRingPackedDesc;
514 
515 typedef struct VRingAvail
516 {
517     uint16_t flags;
518     uint16_t idx;
519     uint16_t ring[];
520 } VRingAvail;
521 
522 typedef struct VRingUsedElem
523 {
524     uint32_t id;
525     uint32_t len;
526 } VRingUsedElem;
527 
528 typedef struct VRingUsed
529 {
530     uint16_t flags;
531     uint16_t idx;
532     VRingUsedElem ring[];
533 } VRingUsed;
534 
535 typedef struct VRingMemoryRegionCaches {
536     struct rcu_head rcu;
537     MemoryRegionCache desc;
538     MemoryRegionCache avail;
539     MemoryRegionCache used;
540 } VRingMemoryRegionCaches;
541 
542 typedef struct VRing
543 {
544     unsigned int num;
545     unsigned int num_default;
546     unsigned int align;
547     hwaddr desc;
548     hwaddr avail;
549     hwaddr used;
550     VRingMemoryRegionCaches *caches;
551 } VRing;
552 
553 typedef struct VRingPackedDescEvent {
554     uint16_t off_wrap;
555     uint16_t flags;
556 } VRingPackedDescEvent ;
557 
558 struct VirtQueue
559 {
560     VRing vring;
561     VirtQueueElement *used_elems;
562 
563     /* Next head to pop */
564     uint16_t last_avail_idx;
565     bool last_avail_wrap_counter;
566 
567     /* Last avail_idx read from VQ. */
568     uint16_t shadow_avail_idx;
569     bool shadow_avail_wrap_counter;
570 
571     uint16_t used_idx;
572     bool used_wrap_counter;
573 
574     /* Last used index value we have signalled on */
575     uint16_t signalled_used;
576 
577     /* Last used index value we have signalled on */
578     bool signalled_used_valid;
579 
580     /* Notification enabled? */
581     bool notification;
582 
583     uint16_t queue_index;
584 
585     unsigned int inuse;
586 
587     uint16_t vector;
588     VirtIOHandleOutput handle_output;
589     VirtIODevice *vdev;
590     EventNotifier guest_notifier;
591     EventNotifier host_notifier;
592     bool host_notifier_enabled;
593     QLIST_ENTRY(VirtQueue) node;
594 };
595 
596 const char *virtio_device_names[] = {
597     [VIRTIO_ID_NET] = "virtio-net",
598     [VIRTIO_ID_BLOCK] = "virtio-blk",
599     [VIRTIO_ID_CONSOLE] = "virtio-serial",
600     [VIRTIO_ID_RNG] = "virtio-rng",
601     [VIRTIO_ID_BALLOON] = "virtio-balloon",
602     [VIRTIO_ID_IOMEM] = "virtio-iomem",
603     [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
604     [VIRTIO_ID_SCSI] = "virtio-scsi",
605     [VIRTIO_ID_9P] = "virtio-9p",
606     [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
607     [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
608     [VIRTIO_ID_CAIF] = "virtio-caif",
609     [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
610     [VIRTIO_ID_GPU] = "virtio-gpu",
611     [VIRTIO_ID_CLOCK] = "virtio-clk",
612     [VIRTIO_ID_INPUT] = "virtio-input",
613     [VIRTIO_ID_VSOCK] = "vhost-vsock",
614     [VIRTIO_ID_CRYPTO] = "virtio-crypto",
615     [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
616     [VIRTIO_ID_PSTORE] = "virtio-pstore",
617     [VIRTIO_ID_IOMMU] = "virtio-iommu",
618     [VIRTIO_ID_MEM] = "virtio-mem",
619     [VIRTIO_ID_SOUND] = "virtio-sound",
620     [VIRTIO_ID_FS] = "virtio-user-fs",
621     [VIRTIO_ID_PMEM] = "virtio-pmem",
622     [VIRTIO_ID_RPMB] = "virtio-rpmb",
623     [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
624     [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
625     [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
626     [VIRTIO_ID_SCMI] = "virtio-scmi",
627     [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
628     [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
629     [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
630     [VIRTIO_ID_CAN] = "virtio-can",
631     [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
632     [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
633     [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
634     [VIRTIO_ID_BT] = "virtio-bluetooth",
635     [VIRTIO_ID_GPIO] = "virtio-gpio"
636 };
637 
638 static const char *virtio_id_to_name(uint16_t device_id)
639 {
640     assert(device_id < G_N_ELEMENTS(virtio_device_names));
641     const char *name = virtio_device_names[device_id];
642     assert(name != NULL);
643     return name;
644 }
645 
646 /* Called within call_rcu().  */
647 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
648 {
649     assert(caches != NULL);
650     address_space_cache_destroy(&caches->desc);
651     address_space_cache_destroy(&caches->avail);
652     address_space_cache_destroy(&caches->used);
653     g_free(caches);
654 }
655 
656 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
657 {
658     VRingMemoryRegionCaches *caches;
659 
660     caches = qatomic_read(&vq->vring.caches);
661     qatomic_rcu_set(&vq->vring.caches, NULL);
662     if (caches) {
663         call_rcu(caches, virtio_free_region_cache, rcu);
664     }
665 }
666 
667 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
668 {
669     VirtQueue *vq = &vdev->vq[n];
670     VRingMemoryRegionCaches *old = vq->vring.caches;
671     VRingMemoryRegionCaches *new = NULL;
672     hwaddr addr, size;
673     int64_t len;
674     bool packed;
675 
676 
677     addr = vq->vring.desc;
678     if (!addr) {
679         goto out_no_cache;
680     }
681     new = g_new0(VRingMemoryRegionCaches, 1);
682     size = virtio_queue_get_desc_size(vdev, n);
683     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
684                                    true : false;
685     len = address_space_cache_init(&new->desc, vdev->dma_as,
686                                    addr, size, packed);
687     if (len < size) {
688         virtio_error(vdev, "Cannot map desc");
689         goto err_desc;
690     }
691 
692     size = virtio_queue_get_used_size(vdev, n);
693     len = address_space_cache_init(&new->used, vdev->dma_as,
694                                    vq->vring.used, size, true);
695     if (len < size) {
696         virtio_error(vdev, "Cannot map used");
697         goto err_used;
698     }
699 
700     size = virtio_queue_get_avail_size(vdev, n);
701     len = address_space_cache_init(&new->avail, vdev->dma_as,
702                                    vq->vring.avail, size, false);
703     if (len < size) {
704         virtio_error(vdev, "Cannot map avail");
705         goto err_avail;
706     }
707 
708     qatomic_rcu_set(&vq->vring.caches, new);
709     if (old) {
710         call_rcu(old, virtio_free_region_cache, rcu);
711     }
712     return;
713 
714 err_avail:
715     address_space_cache_destroy(&new->avail);
716 err_used:
717     address_space_cache_destroy(&new->used);
718 err_desc:
719     address_space_cache_destroy(&new->desc);
720 out_no_cache:
721     g_free(new);
722     virtio_virtqueue_reset_region_cache(vq);
723 }
724 
725 /* virt queue functions */
726 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
727 {
728     VRing *vring = &vdev->vq[n].vring;
729 
730     if (!vring->num || !vring->desc || !vring->align) {
731         /* not yet setup -> nothing to do */
732         return;
733     }
734     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
735     vring->used = vring_align(vring->avail +
736                               offsetof(VRingAvail, ring[vring->num]),
737                               vring->align);
738     virtio_init_region_cache(vdev, n);
739 }
740 
741 /* Called within rcu_read_lock().  */
742 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
743                                   MemoryRegionCache *cache, int i)
744 {
745     address_space_read_cached(cache, i * sizeof(VRingDesc),
746                               desc, sizeof(VRingDesc));
747     virtio_tswap64s(vdev, &desc->addr);
748     virtio_tswap32s(vdev, &desc->len);
749     virtio_tswap16s(vdev, &desc->flags);
750     virtio_tswap16s(vdev, &desc->next);
751 }
752 
753 static void vring_packed_event_read(VirtIODevice *vdev,
754                                     MemoryRegionCache *cache,
755                                     VRingPackedDescEvent *e)
756 {
757     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
758     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
759 
760     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
761     /* Make sure flags is seen before off_wrap */
762     smp_rmb();
763     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
764     virtio_tswap16s(vdev, &e->flags);
765 }
766 
767 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
768                                         MemoryRegionCache *cache,
769                                         uint16_t off_wrap)
770 {
771     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
772 
773     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
774     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
775 }
776 
777 static void vring_packed_flags_write(VirtIODevice *vdev,
778                                      MemoryRegionCache *cache, uint16_t flags)
779 {
780     hwaddr off = offsetof(VRingPackedDescEvent, flags);
781 
782     virtio_stw_phys_cached(vdev, cache, off, flags);
783     address_space_cache_invalidate(cache, off, sizeof(flags));
784 }
785 
786 /* Called within rcu_read_lock().  */
787 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
788 {
789     return qatomic_rcu_read(&vq->vring.caches);
790 }
791 
792 /* Called within rcu_read_lock().  */
793 static inline uint16_t vring_avail_flags(VirtQueue *vq)
794 {
795     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
796     hwaddr pa = offsetof(VRingAvail, flags);
797 
798     if (!caches) {
799         return 0;
800     }
801 
802     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
803 }
804 
805 /* Called within rcu_read_lock().  */
806 static inline uint16_t vring_avail_idx(VirtQueue *vq)
807 {
808     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
809     hwaddr pa = offsetof(VRingAvail, idx);
810 
811     if (!caches) {
812         return 0;
813     }
814 
815     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
816     return vq->shadow_avail_idx;
817 }
818 
819 /* Called within rcu_read_lock().  */
820 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
821 {
822     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
823     hwaddr pa = offsetof(VRingAvail, ring[i]);
824 
825     if (!caches) {
826         return 0;
827     }
828 
829     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
830 }
831 
832 /* Called within rcu_read_lock().  */
833 static inline uint16_t vring_get_used_event(VirtQueue *vq)
834 {
835     return vring_avail_ring(vq, vq->vring.num);
836 }
837 
838 /* Called within rcu_read_lock().  */
839 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
840                                     int i)
841 {
842     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
843     hwaddr pa = offsetof(VRingUsed, ring[i]);
844 
845     if (!caches) {
846         return;
847     }
848 
849     virtio_tswap32s(vq->vdev, &uelem->id);
850     virtio_tswap32s(vq->vdev, &uelem->len);
851     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
852     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
853 }
854 
855 /* Called within rcu_read_lock(). */
856 static inline uint16_t vring_used_flags(VirtQueue *vq)
857 {
858     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
859     hwaddr pa = offsetof(VRingUsed, flags);
860 
861     if (!caches) {
862         return 0;
863     }
864 
865     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
866 }
867 
868 /* Called within rcu_read_lock().  */
869 static uint16_t vring_used_idx(VirtQueue *vq)
870 {
871     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
872     hwaddr pa = offsetof(VRingUsed, idx);
873 
874     if (!caches) {
875         return 0;
876     }
877 
878     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
879 }
880 
881 /* Called within rcu_read_lock().  */
882 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
883 {
884     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
885     hwaddr pa = offsetof(VRingUsed, idx);
886 
887     if (caches) {
888         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
889         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
890     }
891 
892     vq->used_idx = val;
893 }
894 
895 /* Called within rcu_read_lock().  */
896 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
897 {
898     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
899     VirtIODevice *vdev = vq->vdev;
900     hwaddr pa = offsetof(VRingUsed, flags);
901     uint16_t flags;
902 
903     if (!caches) {
904         return;
905     }
906 
907     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
908     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
909     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
910 }
911 
912 /* Called within rcu_read_lock().  */
913 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
914 {
915     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
916     VirtIODevice *vdev = vq->vdev;
917     hwaddr pa = offsetof(VRingUsed, flags);
918     uint16_t flags;
919 
920     if (!caches) {
921         return;
922     }
923 
924     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
925     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
926     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
927 }
928 
929 /* Called within rcu_read_lock().  */
930 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
931 {
932     VRingMemoryRegionCaches *caches;
933     hwaddr pa;
934     if (!vq->notification) {
935         return;
936     }
937 
938     caches = vring_get_region_caches(vq);
939     if (!caches) {
940         return;
941     }
942 
943     pa = offsetof(VRingUsed, ring[vq->vring.num]);
944     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
945     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
946 }
947 
948 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
949 {
950     RCU_READ_LOCK_GUARD();
951 
952     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
953         vring_set_avail_event(vq, vring_avail_idx(vq));
954     } else if (enable) {
955         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
956     } else {
957         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
958     }
959     if (enable) {
960         /* Expose avail event/used flags before caller checks the avail idx. */
961         smp_mb();
962     }
963 }
964 
965 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
966 {
967     uint16_t off_wrap;
968     VRingPackedDescEvent e;
969     VRingMemoryRegionCaches *caches;
970 
971     RCU_READ_LOCK_GUARD();
972     caches = vring_get_region_caches(vq);
973     if (!caches) {
974         return;
975     }
976 
977     vring_packed_event_read(vq->vdev, &caches->used, &e);
978 
979     if (!enable) {
980         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
981     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
982         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
983         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
984         /* Make sure off_wrap is wrote before flags */
985         smp_wmb();
986         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
987     } else {
988         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
989     }
990 
991     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
992     if (enable) {
993         /* Expose avail event/used flags before caller checks the avail idx. */
994         smp_mb();
995     }
996 }
997 
998 bool virtio_queue_get_notification(VirtQueue *vq)
999 {
1000     return vq->notification;
1001 }
1002 
1003 void virtio_queue_set_notification(VirtQueue *vq, int enable)
1004 {
1005     vq->notification = enable;
1006 
1007     if (!vq->vring.desc) {
1008         return;
1009     }
1010 
1011     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1012         virtio_queue_packed_set_notification(vq, enable);
1013     } else {
1014         virtio_queue_split_set_notification(vq, enable);
1015     }
1016 }
1017 
1018 int virtio_queue_ready(VirtQueue *vq)
1019 {
1020     return vq->vring.avail != 0;
1021 }
1022 
1023 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
1024                                          uint16_t *flags,
1025                                          MemoryRegionCache *cache,
1026                                          int i)
1027 {
1028     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1029 
1030     *flags = virtio_lduw_phys_cached(vdev, cache, off);
1031 }
1032 
1033 static void vring_packed_desc_read(VirtIODevice *vdev,
1034                                    VRingPackedDesc *desc,
1035                                    MemoryRegionCache *cache,
1036                                    int i, bool strict_order)
1037 {
1038     hwaddr off = i * sizeof(VRingPackedDesc);
1039 
1040     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
1041 
1042     if (strict_order) {
1043         /* Make sure flags is read before the rest fields. */
1044         smp_rmb();
1045     }
1046 
1047     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
1048                               &desc->addr, sizeof(desc->addr));
1049     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
1050                               &desc->id, sizeof(desc->id));
1051     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
1052                               &desc->len, sizeof(desc->len));
1053     virtio_tswap64s(vdev, &desc->addr);
1054     virtio_tswap16s(vdev, &desc->id);
1055     virtio_tswap32s(vdev, &desc->len);
1056 }
1057 
1058 static void vring_packed_desc_write_data(VirtIODevice *vdev,
1059                                          VRingPackedDesc *desc,
1060                                          MemoryRegionCache *cache,
1061                                          int i)
1062 {
1063     hwaddr off_id = i * sizeof(VRingPackedDesc) +
1064                     offsetof(VRingPackedDesc, id);
1065     hwaddr off_len = i * sizeof(VRingPackedDesc) +
1066                     offsetof(VRingPackedDesc, len);
1067 
1068     virtio_tswap32s(vdev, &desc->len);
1069     virtio_tswap16s(vdev, &desc->id);
1070     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
1071     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
1072     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
1073     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
1074 }
1075 
1076 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
1077                                           VRingPackedDesc *desc,
1078                                           MemoryRegionCache *cache,
1079                                           int i)
1080 {
1081     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1082 
1083     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
1084     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
1085 }
1086 
1087 static void vring_packed_desc_write(VirtIODevice *vdev,
1088                                     VRingPackedDesc *desc,
1089                                     MemoryRegionCache *cache,
1090                                     int i, bool strict_order)
1091 {
1092     vring_packed_desc_write_data(vdev, desc, cache, i);
1093     if (strict_order) {
1094         /* Make sure data is wrote before flags. */
1095         smp_wmb();
1096     }
1097     vring_packed_desc_write_flags(vdev, desc, cache, i);
1098 }
1099 
1100 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
1101 {
1102     bool avail, used;
1103 
1104     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1105     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1106     return (avail != used) && (avail == wrap_counter);
1107 }
1108 
1109 /* Fetch avail_idx from VQ memory only when we really need to know if
1110  * guest has added some buffers.
1111  * Called within rcu_read_lock().  */
1112 static int virtio_queue_empty_rcu(VirtQueue *vq)
1113 {
1114     if (virtio_device_disabled(vq->vdev)) {
1115         return 1;
1116     }
1117 
1118     if (unlikely(!vq->vring.avail)) {
1119         return 1;
1120     }
1121 
1122     if (vq->shadow_avail_idx != vq->last_avail_idx) {
1123         return 0;
1124     }
1125 
1126     return vring_avail_idx(vq) == vq->last_avail_idx;
1127 }
1128 
1129 static int virtio_queue_split_empty(VirtQueue *vq)
1130 {
1131     bool empty;
1132 
1133     if (virtio_device_disabled(vq->vdev)) {
1134         return 1;
1135     }
1136 
1137     if (unlikely(!vq->vring.avail)) {
1138         return 1;
1139     }
1140 
1141     if (vq->shadow_avail_idx != vq->last_avail_idx) {
1142         return 0;
1143     }
1144 
1145     RCU_READ_LOCK_GUARD();
1146     empty = vring_avail_idx(vq) == vq->last_avail_idx;
1147     return empty;
1148 }
1149 
1150 /* Called within rcu_read_lock().  */
1151 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
1152 {
1153     struct VRingPackedDesc desc;
1154     VRingMemoryRegionCaches *cache;
1155 
1156     if (unlikely(!vq->vring.desc)) {
1157         return 1;
1158     }
1159 
1160     cache = vring_get_region_caches(vq);
1161     if (!cache) {
1162         return 1;
1163     }
1164 
1165     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
1166                                  vq->last_avail_idx);
1167 
1168     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
1169 }
1170 
1171 static int virtio_queue_packed_empty(VirtQueue *vq)
1172 {
1173     RCU_READ_LOCK_GUARD();
1174     return virtio_queue_packed_empty_rcu(vq);
1175 }
1176 
1177 int virtio_queue_empty(VirtQueue *vq)
1178 {
1179     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1180         return virtio_queue_packed_empty(vq);
1181     } else {
1182         return virtio_queue_split_empty(vq);
1183     }
1184 }
1185 
1186 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
1187                                unsigned int len)
1188 {
1189     AddressSpace *dma_as = vq->vdev->dma_as;
1190     unsigned int offset;
1191     int i;
1192 
1193     offset = 0;
1194     for (i = 0; i < elem->in_num; i++) {
1195         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
1196 
1197         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
1198                          elem->in_sg[i].iov_len,
1199                          DMA_DIRECTION_FROM_DEVICE, size);
1200 
1201         offset += size;
1202     }
1203 
1204     for (i = 0; i < elem->out_num; i++)
1205         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
1206                          elem->out_sg[i].iov_len,
1207                          DMA_DIRECTION_TO_DEVICE,
1208                          elem->out_sg[i].iov_len);
1209 }
1210 
1211 /* virtqueue_detach_element:
1212  * @vq: The #VirtQueue
1213  * @elem: The #VirtQueueElement
1214  * @len: number of bytes written
1215  *
1216  * Detach the element from the virtqueue.  This function is suitable for device
1217  * reset or other situations where a #VirtQueueElement is simply freed and will
1218  * not be pushed or discarded.
1219  */
1220 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
1221                               unsigned int len)
1222 {
1223     vq->inuse -= elem->ndescs;
1224     virtqueue_unmap_sg(vq, elem, len);
1225 }
1226 
1227 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
1228 {
1229     vq->last_avail_idx -= num;
1230 }
1231 
1232 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
1233 {
1234     if (vq->last_avail_idx < num) {
1235         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
1236         vq->last_avail_wrap_counter ^= 1;
1237     } else {
1238         vq->last_avail_idx -= num;
1239     }
1240 }
1241 
1242 /* virtqueue_unpop:
1243  * @vq: The #VirtQueue
1244  * @elem: The #VirtQueueElement
1245  * @len: number of bytes written
1246  *
1247  * Pretend the most recent element wasn't popped from the virtqueue.  The next
1248  * call to virtqueue_pop() will refetch the element.
1249  */
1250 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
1251                      unsigned int len)
1252 {
1253 
1254     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1255         virtqueue_packed_rewind(vq, 1);
1256     } else {
1257         virtqueue_split_rewind(vq, 1);
1258     }
1259 
1260     virtqueue_detach_element(vq, elem, len);
1261 }
1262 
1263 /* virtqueue_rewind:
1264  * @vq: The #VirtQueue
1265  * @num: Number of elements to push back
1266  *
1267  * Pretend that elements weren't popped from the virtqueue.  The next
1268  * virtqueue_pop() will refetch the oldest element.
1269  *
1270  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
1271  *
1272  * Returns: true on success, false if @num is greater than the number of in use
1273  * elements.
1274  */
1275 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
1276 {
1277     if (num > vq->inuse) {
1278         return false;
1279     }
1280 
1281     vq->inuse -= num;
1282     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1283         virtqueue_packed_rewind(vq, num);
1284     } else {
1285         virtqueue_split_rewind(vq, num);
1286     }
1287     return true;
1288 }
1289 
1290 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
1291                     unsigned int len, unsigned int idx)
1292 {
1293     VRingUsedElem uelem;
1294 
1295     if (unlikely(!vq->vring.used)) {
1296         return;
1297     }
1298 
1299     idx = (idx + vq->used_idx) % vq->vring.num;
1300 
1301     uelem.id = elem->index;
1302     uelem.len = len;
1303     vring_used_write(vq, &uelem, idx);
1304 }
1305 
1306 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
1307                                   unsigned int len, unsigned int idx)
1308 {
1309     vq->used_elems[idx].index = elem->index;
1310     vq->used_elems[idx].len = len;
1311     vq->used_elems[idx].ndescs = elem->ndescs;
1312 }
1313 
1314 static void virtqueue_packed_fill_desc(VirtQueue *vq,
1315                                        const VirtQueueElement *elem,
1316                                        unsigned int idx,
1317                                        bool strict_order)
1318 {
1319     uint16_t head;
1320     VRingMemoryRegionCaches *caches;
1321     VRingPackedDesc desc = {
1322         .id = elem->index,
1323         .len = elem->len,
1324     };
1325     bool wrap_counter = vq->used_wrap_counter;
1326 
1327     if (unlikely(!vq->vring.desc)) {
1328         return;
1329     }
1330 
1331     head = vq->used_idx + idx;
1332     if (head >= vq->vring.num) {
1333         head -= vq->vring.num;
1334         wrap_counter ^= 1;
1335     }
1336     if (wrap_counter) {
1337         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
1338         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
1339     } else {
1340         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
1341         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
1342     }
1343 
1344     caches = vring_get_region_caches(vq);
1345     if (!caches) {
1346         return;
1347     }
1348 
1349     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
1350 }
1351 
1352 /* Called within rcu_read_lock().  */
1353 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
1354                     unsigned int len, unsigned int idx)
1355 {
1356     trace_virtqueue_fill(vq, elem, len, idx);
1357 
1358     virtqueue_unmap_sg(vq, elem, len);
1359 
1360     if (virtio_device_disabled(vq->vdev)) {
1361         return;
1362     }
1363 
1364     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1365         virtqueue_packed_fill(vq, elem, len, idx);
1366     } else {
1367         virtqueue_split_fill(vq, elem, len, idx);
1368     }
1369 }
1370 
1371 /* Called within rcu_read_lock().  */
1372 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
1373 {
1374     uint16_t old, new;
1375 
1376     if (unlikely(!vq->vring.used)) {
1377         return;
1378     }
1379 
1380     /* Make sure buffer is written before we update index. */
1381     smp_wmb();
1382     trace_virtqueue_flush(vq, count);
1383     old = vq->used_idx;
1384     new = old + count;
1385     vring_used_idx_set(vq, new);
1386     vq->inuse -= count;
1387     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
1388         vq->signalled_used_valid = false;
1389 }
1390 
1391 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
1392 {
1393     unsigned int i, ndescs = 0;
1394 
1395     if (unlikely(!vq->vring.desc)) {
1396         return;
1397     }
1398 
1399     for (i = 1; i < count; i++) {
1400         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
1401         ndescs += vq->used_elems[i].ndescs;
1402     }
1403     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
1404     ndescs += vq->used_elems[0].ndescs;
1405 
1406     vq->inuse -= ndescs;
1407     vq->used_idx += ndescs;
1408     if (vq->used_idx >= vq->vring.num) {
1409         vq->used_idx -= vq->vring.num;
1410         vq->used_wrap_counter ^= 1;
1411         vq->signalled_used_valid = false;
1412     }
1413 }
1414 
1415 void virtqueue_flush(VirtQueue *vq, unsigned int count)
1416 {
1417     if (virtio_device_disabled(vq->vdev)) {
1418         vq->inuse -= count;
1419         return;
1420     }
1421 
1422     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1423         virtqueue_packed_flush(vq, count);
1424     } else {
1425         virtqueue_split_flush(vq, count);
1426     }
1427 }
1428 
1429 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
1430                     unsigned int len)
1431 {
1432     RCU_READ_LOCK_GUARD();
1433     virtqueue_fill(vq, elem, len, 0);
1434     virtqueue_flush(vq, 1);
1435 }
1436 
1437 /* Called within rcu_read_lock().  */
1438 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1439 {
1440     uint16_t num_heads = vring_avail_idx(vq) - idx;
1441 
1442     /* Check it isn't doing very strange things with descriptor numbers. */
1443     if (num_heads > vq->vring.num) {
1444         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1445                      idx, vq->shadow_avail_idx);
1446         return -EINVAL;
1447     }
1448     /* On success, callers read a descriptor at vq->last_avail_idx.
1449      * Make sure descriptor read does not bypass avail index read. */
1450     if (num_heads) {
1451         smp_rmb();
1452     }
1453 
1454     return num_heads;
1455 }
1456 
1457 /* Called within rcu_read_lock().  */
1458 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1459                                unsigned int *head)
1460 {
1461     /* Grab the next descriptor number they're advertising, and increment
1462      * the index we've seen. */
1463     *head = vring_avail_ring(vq, idx % vq->vring.num);
1464 
1465     /* If their number is silly, that's a fatal mistake. */
1466     if (*head >= vq->vring.num) {
1467         virtio_error(vq->vdev, "Guest says index %u is available", *head);
1468         return false;
1469     }
1470 
1471     return true;
1472 }
1473 
1474 enum {
1475     VIRTQUEUE_READ_DESC_ERROR = -1,
1476     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1477     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1478 };
1479 
1480 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1481                                           MemoryRegionCache *desc_cache,
1482                                           unsigned int max, unsigned int *next)
1483 {
1484     /* If this descriptor says it doesn't chain, we're done. */
1485     if (!(desc->flags & VRING_DESC_F_NEXT)) {
1486         return VIRTQUEUE_READ_DESC_DONE;
1487     }
1488 
1489     /* Check they're not leading us off end of descriptors. */
1490     *next = desc->next;
1491     /* Make sure compiler knows to grab that: we don't want it changing! */
1492     smp_wmb();
1493 
1494     if (*next >= max) {
1495         virtio_error(vdev, "Desc next is %u", *next);
1496         return VIRTQUEUE_READ_DESC_ERROR;
1497     }
1498 
1499     vring_split_desc_read(vdev, desc, desc_cache, *next);
1500     return VIRTQUEUE_READ_DESC_MORE;
1501 }
1502 
1503 /* Called within rcu_read_lock().  */
1504 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1505                             unsigned int *in_bytes, unsigned int *out_bytes,
1506                             unsigned max_in_bytes, unsigned max_out_bytes,
1507                             VRingMemoryRegionCaches *caches)
1508 {
1509     VirtIODevice *vdev = vq->vdev;
1510     unsigned int max, idx;
1511     unsigned int total_bufs, in_total, out_total;
1512     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1513     int64_t len = 0;
1514     int rc;
1515 
1516     idx = vq->last_avail_idx;
1517     total_bufs = in_total = out_total = 0;
1518 
1519     max = vq->vring.num;
1520 
1521     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1522         MemoryRegionCache *desc_cache = &caches->desc;
1523         unsigned int num_bufs;
1524         VRingDesc desc;
1525         unsigned int i;
1526 
1527         num_bufs = total_bufs;
1528 
1529         if (!virtqueue_get_head(vq, idx++, &i)) {
1530             goto err;
1531         }
1532 
1533         vring_split_desc_read(vdev, &desc, desc_cache, i);
1534 
1535         if (desc.flags & VRING_DESC_F_INDIRECT) {
1536             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1537                 virtio_error(vdev, "Invalid size for indirect buffer table");
1538                 goto err;
1539             }
1540 
1541             /* If we've got too many, that implies a descriptor loop. */
1542             if (num_bufs >= max) {
1543                 virtio_error(vdev, "Looped descriptor");
1544                 goto err;
1545             }
1546 
1547             /* loop over the indirect descriptor table */
1548             len = address_space_cache_init(&indirect_desc_cache,
1549                                            vdev->dma_as,
1550                                            desc.addr, desc.len, false);
1551             desc_cache = &indirect_desc_cache;
1552             if (len < desc.len) {
1553                 virtio_error(vdev, "Cannot map indirect buffer");
1554                 goto err;
1555             }
1556 
1557             max = desc.len / sizeof(VRingDesc);
1558             num_bufs = i = 0;
1559             vring_split_desc_read(vdev, &desc, desc_cache, i);
1560         }
1561 
1562         do {
1563             /* If we've got too many, that implies a descriptor loop. */
1564             if (++num_bufs > max) {
1565                 virtio_error(vdev, "Looped descriptor");
1566                 goto err;
1567             }
1568 
1569             if (desc.flags & VRING_DESC_F_WRITE) {
1570                 in_total += desc.len;
1571             } else {
1572                 out_total += desc.len;
1573             }
1574             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1575                 goto done;
1576             }
1577 
1578             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1579         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1580 
1581         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1582             goto err;
1583         }
1584 
1585         if (desc_cache == &indirect_desc_cache) {
1586             address_space_cache_destroy(&indirect_desc_cache);
1587             total_bufs++;
1588         } else {
1589             total_bufs = num_bufs;
1590         }
1591     }
1592 
1593     if (rc < 0) {
1594         goto err;
1595     }
1596 
1597 done:
1598     address_space_cache_destroy(&indirect_desc_cache);
1599     if (in_bytes) {
1600         *in_bytes = in_total;
1601     }
1602     if (out_bytes) {
1603         *out_bytes = out_total;
1604     }
1605     return;
1606 
1607 err:
1608     in_total = out_total = 0;
1609     goto done;
1610 }
1611 
1612 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1613                                            VRingPackedDesc *desc,
1614                                            MemoryRegionCache
1615                                            *desc_cache,
1616                                            unsigned int max,
1617                                            unsigned int *next,
1618                                            bool indirect)
1619 {
1620     /* If this descriptor says it doesn't chain, we're done. */
1621     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1622         return VIRTQUEUE_READ_DESC_DONE;
1623     }
1624 
1625     ++*next;
1626     if (*next == max) {
1627         if (indirect) {
1628             return VIRTQUEUE_READ_DESC_DONE;
1629         } else {
1630             (*next) -= vq->vring.num;
1631         }
1632     }
1633 
1634     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1635     return VIRTQUEUE_READ_DESC_MORE;
1636 }
1637 
1638 /* Called within rcu_read_lock().  */
1639 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1640                                              unsigned int *in_bytes,
1641                                              unsigned int *out_bytes,
1642                                              unsigned max_in_bytes,
1643                                              unsigned max_out_bytes,
1644                                              VRingMemoryRegionCaches *caches)
1645 {
1646     VirtIODevice *vdev = vq->vdev;
1647     unsigned int max, idx;
1648     unsigned int total_bufs, in_total, out_total;
1649     MemoryRegionCache *desc_cache;
1650     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1651     int64_t len = 0;
1652     VRingPackedDesc desc;
1653     bool wrap_counter;
1654 
1655     idx = vq->last_avail_idx;
1656     wrap_counter = vq->last_avail_wrap_counter;
1657     total_bufs = in_total = out_total = 0;
1658 
1659     max = vq->vring.num;
1660 
1661     for (;;) {
1662         unsigned int num_bufs = total_bufs;
1663         unsigned int i = idx;
1664         int rc;
1665 
1666         desc_cache = &caches->desc;
1667         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1668         if (!is_desc_avail(desc.flags, wrap_counter)) {
1669             break;
1670         }
1671 
1672         if (desc.flags & VRING_DESC_F_INDIRECT) {
1673             if (desc.len % sizeof(VRingPackedDesc)) {
1674                 virtio_error(vdev, "Invalid size for indirect buffer table");
1675                 goto err;
1676             }
1677 
1678             /* If we've got too many, that implies a descriptor loop. */
1679             if (num_bufs >= max) {
1680                 virtio_error(vdev, "Looped descriptor");
1681                 goto err;
1682             }
1683 
1684             /* loop over the indirect descriptor table */
1685             len = address_space_cache_init(&indirect_desc_cache,
1686                                            vdev->dma_as,
1687                                            desc.addr, desc.len, false);
1688             desc_cache = &indirect_desc_cache;
1689             if (len < desc.len) {
1690                 virtio_error(vdev, "Cannot map indirect buffer");
1691                 goto err;
1692             }
1693 
1694             max = desc.len / sizeof(VRingPackedDesc);
1695             num_bufs = i = 0;
1696             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1697         }
1698 
1699         do {
1700             /* If we've got too many, that implies a descriptor loop. */
1701             if (++num_bufs > max) {
1702                 virtio_error(vdev, "Looped descriptor");
1703                 goto err;
1704             }
1705 
1706             if (desc.flags & VRING_DESC_F_WRITE) {
1707                 in_total += desc.len;
1708             } else {
1709                 out_total += desc.len;
1710             }
1711             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1712                 goto done;
1713             }
1714 
1715             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1716                                                  &i, desc_cache ==
1717                                                  &indirect_desc_cache);
1718         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1719 
1720         if (desc_cache == &indirect_desc_cache) {
1721             address_space_cache_destroy(&indirect_desc_cache);
1722             total_bufs++;
1723             idx++;
1724         } else {
1725             idx += num_bufs - total_bufs;
1726             total_bufs = num_bufs;
1727         }
1728 
1729         if (idx >= vq->vring.num) {
1730             idx -= vq->vring.num;
1731             wrap_counter ^= 1;
1732         }
1733     }
1734 
1735     /* Record the index and wrap counter for a kick we want */
1736     vq->shadow_avail_idx = idx;
1737     vq->shadow_avail_wrap_counter = wrap_counter;
1738 done:
1739     address_space_cache_destroy(&indirect_desc_cache);
1740     if (in_bytes) {
1741         *in_bytes = in_total;
1742     }
1743     if (out_bytes) {
1744         *out_bytes = out_total;
1745     }
1746     return;
1747 
1748 err:
1749     in_total = out_total = 0;
1750     goto done;
1751 }
1752 
1753 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1754                                unsigned int *out_bytes,
1755                                unsigned max_in_bytes, unsigned max_out_bytes)
1756 {
1757     uint16_t desc_size;
1758     VRingMemoryRegionCaches *caches;
1759 
1760     RCU_READ_LOCK_GUARD();
1761 
1762     if (unlikely(!vq->vring.desc)) {
1763         goto err;
1764     }
1765 
1766     caches = vring_get_region_caches(vq);
1767     if (!caches) {
1768         goto err;
1769     }
1770 
1771     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1772                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1773     if (caches->desc.len < vq->vring.num * desc_size) {
1774         virtio_error(vq->vdev, "Cannot map descriptor ring");
1775         goto err;
1776     }
1777 
1778     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1779         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1780                                          max_in_bytes, max_out_bytes,
1781                                          caches);
1782     } else {
1783         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1784                                         max_in_bytes, max_out_bytes,
1785                                         caches);
1786     }
1787 
1788     return;
1789 err:
1790     if (in_bytes) {
1791         *in_bytes = 0;
1792     }
1793     if (out_bytes) {
1794         *out_bytes = 0;
1795     }
1796 }
1797 
1798 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1799                           unsigned int out_bytes)
1800 {
1801     unsigned int in_total, out_total;
1802 
1803     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1804     return in_bytes <= in_total && out_bytes <= out_total;
1805 }
1806 
1807 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1808                                hwaddr *addr, struct iovec *iov,
1809                                unsigned int max_num_sg, bool is_write,
1810                                hwaddr pa, size_t sz)
1811 {
1812     bool ok = false;
1813     unsigned num_sg = *p_num_sg;
1814     assert(num_sg <= max_num_sg);
1815 
1816     if (!sz) {
1817         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1818         goto out;
1819     }
1820 
1821     while (sz) {
1822         hwaddr len = sz;
1823 
1824         if (num_sg == max_num_sg) {
1825             virtio_error(vdev, "virtio: too many write descriptors in "
1826                                "indirect table");
1827             goto out;
1828         }
1829 
1830         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1831                                               is_write ?
1832                                               DMA_DIRECTION_FROM_DEVICE :
1833                                               DMA_DIRECTION_TO_DEVICE,
1834                                               MEMTXATTRS_UNSPECIFIED);
1835         if (!iov[num_sg].iov_base) {
1836             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1837             goto out;
1838         }
1839 
1840         iov[num_sg].iov_len = len;
1841         addr[num_sg] = pa;
1842 
1843         sz -= len;
1844         pa += len;
1845         num_sg++;
1846     }
1847     ok = true;
1848 
1849 out:
1850     *p_num_sg = num_sg;
1851     return ok;
1852 }
1853 
1854 /* Only used by error code paths before we have a VirtQueueElement (therefore
1855  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1856  * yet.
1857  */
1858 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1859                                     struct iovec *iov)
1860 {
1861     unsigned int i;
1862 
1863     for (i = 0; i < out_num + in_num; i++) {
1864         int is_write = i >= out_num;
1865 
1866         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1867         iov++;
1868     }
1869 }
1870 
1871 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1872                                 hwaddr *addr, unsigned int num_sg,
1873                                 bool is_write)
1874 {
1875     unsigned int i;
1876     hwaddr len;
1877 
1878     for (i = 0; i < num_sg; i++) {
1879         len = sg[i].iov_len;
1880         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1881                                         addr[i], &len, is_write ?
1882                                         DMA_DIRECTION_FROM_DEVICE :
1883                                         DMA_DIRECTION_TO_DEVICE,
1884                                         MEMTXATTRS_UNSPECIFIED);
1885         if (!sg[i].iov_base) {
1886             error_report("virtio: error trying to map MMIO memory");
1887             exit(1);
1888         }
1889         if (len != sg[i].iov_len) {
1890             error_report("virtio: unexpected memory split");
1891             exit(1);
1892         }
1893     }
1894 }
1895 
1896 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1897 {
1898     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1899     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1900                                                                         false);
1901 }
1902 
1903 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1904 {
1905     VirtQueueElement *elem;
1906     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1907     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1908     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1909     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1910     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1911     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1912 
1913     assert(sz >= sizeof(VirtQueueElement));
1914     elem = g_malloc(out_sg_end);
1915     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1916     elem->out_num = out_num;
1917     elem->in_num = in_num;
1918     elem->in_addr = (void *)elem + in_addr_ofs;
1919     elem->out_addr = (void *)elem + out_addr_ofs;
1920     elem->in_sg = (void *)elem + in_sg_ofs;
1921     elem->out_sg = (void *)elem + out_sg_ofs;
1922     return elem;
1923 }
1924 
1925 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1926 {
1927     unsigned int i, head, max;
1928     VRingMemoryRegionCaches *caches;
1929     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1930     MemoryRegionCache *desc_cache;
1931     int64_t len;
1932     VirtIODevice *vdev = vq->vdev;
1933     VirtQueueElement *elem = NULL;
1934     unsigned out_num, in_num, elem_entries;
1935     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1936     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1937     VRingDesc desc;
1938     int rc;
1939 
1940     RCU_READ_LOCK_GUARD();
1941     if (virtio_queue_empty_rcu(vq)) {
1942         goto done;
1943     }
1944     /* Needed after virtio_queue_empty(), see comment in
1945      * virtqueue_num_heads(). */
1946     smp_rmb();
1947 
1948     /* When we start there are none of either input nor output. */
1949     out_num = in_num = elem_entries = 0;
1950 
1951     max = vq->vring.num;
1952 
1953     if (vq->inuse >= vq->vring.num) {
1954         virtio_error(vdev, "Virtqueue size exceeded");
1955         goto done;
1956     }
1957 
1958     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1959         goto done;
1960     }
1961 
1962     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1963         vring_set_avail_event(vq, vq->last_avail_idx);
1964     }
1965 
1966     i = head;
1967 
1968     caches = vring_get_region_caches(vq);
1969     if (!caches) {
1970         virtio_error(vdev, "Region caches not initialized");
1971         goto done;
1972     }
1973 
1974     if (caches->desc.len < max * sizeof(VRingDesc)) {
1975         virtio_error(vdev, "Cannot map descriptor ring");
1976         goto done;
1977     }
1978 
1979     desc_cache = &caches->desc;
1980     vring_split_desc_read(vdev, &desc, desc_cache, i);
1981     if (desc.flags & VRING_DESC_F_INDIRECT) {
1982         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1983             virtio_error(vdev, "Invalid size for indirect buffer table");
1984             goto done;
1985         }
1986 
1987         /* loop over the indirect descriptor table */
1988         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1989                                        desc.addr, desc.len, false);
1990         desc_cache = &indirect_desc_cache;
1991         if (len < desc.len) {
1992             virtio_error(vdev, "Cannot map indirect buffer");
1993             goto done;
1994         }
1995 
1996         max = desc.len / sizeof(VRingDesc);
1997         i = 0;
1998         vring_split_desc_read(vdev, &desc, desc_cache, i);
1999     }
2000 
2001     /* Collect all the descriptors */
2002     do {
2003         bool map_ok;
2004 
2005         if (desc.flags & VRING_DESC_F_WRITE) {
2006             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
2007                                         iov + out_num,
2008                                         VIRTQUEUE_MAX_SIZE - out_num, true,
2009                                         desc.addr, desc.len);
2010         } else {
2011             if (in_num) {
2012                 virtio_error(vdev, "Incorrect order for descriptors");
2013                 goto err_undo_map;
2014             }
2015             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
2016                                         VIRTQUEUE_MAX_SIZE, false,
2017                                         desc.addr, desc.len);
2018         }
2019         if (!map_ok) {
2020             goto err_undo_map;
2021         }
2022 
2023         /* If we've got too many, that implies a descriptor loop. */
2024         if (++elem_entries > max) {
2025             virtio_error(vdev, "Looped descriptor");
2026             goto err_undo_map;
2027         }
2028 
2029         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
2030     } while (rc == VIRTQUEUE_READ_DESC_MORE);
2031 
2032     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
2033         goto err_undo_map;
2034     }
2035 
2036     /* Now copy what we have collected and mapped */
2037     elem = virtqueue_alloc_element(sz, out_num, in_num);
2038     elem->index = head;
2039     elem->ndescs = 1;
2040     for (i = 0; i < out_num; i++) {
2041         elem->out_addr[i] = addr[i];
2042         elem->out_sg[i] = iov[i];
2043     }
2044     for (i = 0; i < in_num; i++) {
2045         elem->in_addr[i] = addr[out_num + i];
2046         elem->in_sg[i] = iov[out_num + i];
2047     }
2048 
2049     vq->inuse++;
2050 
2051     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2052 done:
2053     address_space_cache_destroy(&indirect_desc_cache);
2054 
2055     return elem;
2056 
2057 err_undo_map:
2058     virtqueue_undo_map_desc(out_num, in_num, iov);
2059     goto done;
2060 }
2061 
2062 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
2063 {
2064     unsigned int i, max;
2065     VRingMemoryRegionCaches *caches;
2066     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
2067     MemoryRegionCache *desc_cache;
2068     int64_t len;
2069     VirtIODevice *vdev = vq->vdev;
2070     VirtQueueElement *elem = NULL;
2071     unsigned out_num, in_num, elem_entries;
2072     hwaddr addr[VIRTQUEUE_MAX_SIZE];
2073     struct iovec iov[VIRTQUEUE_MAX_SIZE];
2074     VRingPackedDesc desc;
2075     uint16_t id;
2076     int rc;
2077 
2078     RCU_READ_LOCK_GUARD();
2079     if (virtio_queue_packed_empty_rcu(vq)) {
2080         goto done;
2081     }
2082 
2083     /* When we start there are none of either input nor output. */
2084     out_num = in_num = elem_entries = 0;
2085 
2086     max = vq->vring.num;
2087 
2088     if (vq->inuse >= vq->vring.num) {
2089         virtio_error(vdev, "Virtqueue size exceeded");
2090         goto done;
2091     }
2092 
2093     i = vq->last_avail_idx;
2094 
2095     caches = vring_get_region_caches(vq);
2096     if (!caches) {
2097         virtio_error(vdev, "Region caches not initialized");
2098         goto done;
2099     }
2100 
2101     if (caches->desc.len < max * sizeof(VRingDesc)) {
2102         virtio_error(vdev, "Cannot map descriptor ring");
2103         goto done;
2104     }
2105 
2106     desc_cache = &caches->desc;
2107     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
2108     id = desc.id;
2109     if (desc.flags & VRING_DESC_F_INDIRECT) {
2110         if (desc.len % sizeof(VRingPackedDesc)) {
2111             virtio_error(vdev, "Invalid size for indirect buffer table");
2112             goto done;
2113         }
2114 
2115         /* loop over the indirect descriptor table */
2116         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
2117                                        desc.addr, desc.len, false);
2118         desc_cache = &indirect_desc_cache;
2119         if (len < desc.len) {
2120             virtio_error(vdev, "Cannot map indirect buffer");
2121             goto done;
2122         }
2123 
2124         max = desc.len / sizeof(VRingPackedDesc);
2125         i = 0;
2126         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
2127     }
2128 
2129     /* Collect all the descriptors */
2130     do {
2131         bool map_ok;
2132 
2133         if (desc.flags & VRING_DESC_F_WRITE) {
2134             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
2135                                         iov + out_num,
2136                                         VIRTQUEUE_MAX_SIZE - out_num, true,
2137                                         desc.addr, desc.len);
2138         } else {
2139             if (in_num) {
2140                 virtio_error(vdev, "Incorrect order for descriptors");
2141                 goto err_undo_map;
2142             }
2143             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
2144                                         VIRTQUEUE_MAX_SIZE, false,
2145                                         desc.addr, desc.len);
2146         }
2147         if (!map_ok) {
2148             goto err_undo_map;
2149         }
2150 
2151         /* If we've got too many, that implies a descriptor loop. */
2152         if (++elem_entries > max) {
2153             virtio_error(vdev, "Looped descriptor");
2154             goto err_undo_map;
2155         }
2156 
2157         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
2158                                              desc_cache ==
2159                                              &indirect_desc_cache);
2160     } while (rc == VIRTQUEUE_READ_DESC_MORE);
2161 
2162     /* Now copy what we have collected and mapped */
2163     elem = virtqueue_alloc_element(sz, out_num, in_num);
2164     for (i = 0; i < out_num; i++) {
2165         elem->out_addr[i] = addr[i];
2166         elem->out_sg[i] = iov[i];
2167     }
2168     for (i = 0; i < in_num; i++) {
2169         elem->in_addr[i] = addr[out_num + i];
2170         elem->in_sg[i] = iov[out_num + i];
2171     }
2172 
2173     elem->index = id;
2174     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
2175     vq->last_avail_idx += elem->ndescs;
2176     vq->inuse += elem->ndescs;
2177 
2178     if (vq->last_avail_idx >= vq->vring.num) {
2179         vq->last_avail_idx -= vq->vring.num;
2180         vq->last_avail_wrap_counter ^= 1;
2181     }
2182 
2183     vq->shadow_avail_idx = vq->last_avail_idx;
2184     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
2185 
2186     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2187 done:
2188     address_space_cache_destroy(&indirect_desc_cache);
2189 
2190     return elem;
2191 
2192 err_undo_map:
2193     virtqueue_undo_map_desc(out_num, in_num, iov);
2194     goto done;
2195 }
2196 
2197 void *virtqueue_pop(VirtQueue *vq, size_t sz)
2198 {
2199     if (virtio_device_disabled(vq->vdev)) {
2200         return NULL;
2201     }
2202 
2203     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
2204         return virtqueue_packed_pop(vq, sz);
2205     } else {
2206         return virtqueue_split_pop(vq, sz);
2207     }
2208 }
2209 
2210 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
2211 {
2212     VRingMemoryRegionCaches *caches;
2213     MemoryRegionCache *desc_cache;
2214     unsigned int dropped = 0;
2215     VirtQueueElement elem = {};
2216     VirtIODevice *vdev = vq->vdev;
2217     VRingPackedDesc desc;
2218 
2219     RCU_READ_LOCK_GUARD();
2220 
2221     caches = vring_get_region_caches(vq);
2222     if (!caches) {
2223         return 0;
2224     }
2225 
2226     desc_cache = &caches->desc;
2227 
2228     virtio_queue_set_notification(vq, 0);
2229 
2230     while (vq->inuse < vq->vring.num) {
2231         unsigned int idx = vq->last_avail_idx;
2232         /*
2233          * works similar to virtqueue_pop but does not map buffers
2234          * and does not allocate any memory.
2235          */
2236         vring_packed_desc_read(vdev, &desc, desc_cache,
2237                                vq->last_avail_idx , true);
2238         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
2239             break;
2240         }
2241         elem.index = desc.id;
2242         elem.ndescs = 1;
2243         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
2244                                                vq->vring.num, &idx, false)) {
2245             ++elem.ndescs;
2246         }
2247         /*
2248          * immediately push the element, nothing to unmap
2249          * as both in_num and out_num are set to 0.
2250          */
2251         virtqueue_push(vq, &elem, 0);
2252         dropped++;
2253         vq->last_avail_idx += elem.ndescs;
2254         if (vq->last_avail_idx >= vq->vring.num) {
2255             vq->last_avail_idx -= vq->vring.num;
2256             vq->last_avail_wrap_counter ^= 1;
2257         }
2258     }
2259 
2260     return dropped;
2261 }
2262 
2263 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
2264 {
2265     unsigned int dropped = 0;
2266     VirtQueueElement elem = {};
2267     VirtIODevice *vdev = vq->vdev;
2268     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2269 
2270     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
2271         /* works similar to virtqueue_pop but does not map buffers
2272         * and does not allocate any memory */
2273         smp_rmb();
2274         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
2275             break;
2276         }
2277         vq->inuse++;
2278         vq->last_avail_idx++;
2279         if (fEventIdx) {
2280             vring_set_avail_event(vq, vq->last_avail_idx);
2281         }
2282         /* immediately push the element, nothing to unmap
2283          * as both in_num and out_num are set to 0 */
2284         virtqueue_push(vq, &elem, 0);
2285         dropped++;
2286     }
2287 
2288     return dropped;
2289 }
2290 
2291 /* virtqueue_drop_all:
2292  * @vq: The #VirtQueue
2293  * Drops all queued buffers and indicates them to the guest
2294  * as if they are done. Useful when buffers can not be
2295  * processed but must be returned to the guest.
2296  */
2297 unsigned int virtqueue_drop_all(VirtQueue *vq)
2298 {
2299     struct VirtIODevice *vdev = vq->vdev;
2300 
2301     if (virtio_device_disabled(vq->vdev)) {
2302         return 0;
2303     }
2304 
2305     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2306         return virtqueue_packed_drop_all(vq);
2307     } else {
2308         return virtqueue_split_drop_all(vq);
2309     }
2310 }
2311 
2312 /* Reading and writing a structure directly to QEMUFile is *awful*, but
2313  * it is what QEMU has always done by mistake.  We can change it sooner
2314  * or later by bumping the version number of the affected vm states.
2315  * In the meanwhile, since the in-memory layout of VirtQueueElement
2316  * has changed, we need to marshal to and from the layout that was
2317  * used before the change.
2318  */
2319 typedef struct VirtQueueElementOld {
2320     unsigned int index;
2321     unsigned int out_num;
2322     unsigned int in_num;
2323     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
2324     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
2325     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
2326     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
2327 } VirtQueueElementOld;
2328 
2329 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
2330 {
2331     VirtQueueElement *elem;
2332     VirtQueueElementOld data;
2333     int i;
2334 
2335     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2336 
2337     /* TODO: teach all callers that this can fail, and return failure instead
2338      * of asserting here.
2339      * This is just one thing (there are probably more) that must be
2340      * fixed before we can allow NDEBUG compilation.
2341      */
2342     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
2343     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
2344 
2345     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
2346     elem->index = data.index;
2347 
2348     for (i = 0; i < elem->in_num; i++) {
2349         elem->in_addr[i] = data.in_addr[i];
2350     }
2351 
2352     for (i = 0; i < elem->out_num; i++) {
2353         elem->out_addr[i] = data.out_addr[i];
2354     }
2355 
2356     for (i = 0; i < elem->in_num; i++) {
2357         /* Base is overwritten by virtqueue_map.  */
2358         elem->in_sg[i].iov_base = 0;
2359         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
2360     }
2361 
2362     for (i = 0; i < elem->out_num; i++) {
2363         /* Base is overwritten by virtqueue_map.  */
2364         elem->out_sg[i].iov_base = 0;
2365         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
2366     }
2367 
2368     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2369         qemu_get_be32s(f, &elem->ndescs);
2370     }
2371 
2372     virtqueue_map(vdev, elem);
2373     return elem;
2374 }
2375 
2376 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
2377                                 VirtQueueElement *elem)
2378 {
2379     VirtQueueElementOld data;
2380     int i;
2381 
2382     memset(&data, 0, sizeof(data));
2383     data.index = elem->index;
2384     data.in_num = elem->in_num;
2385     data.out_num = elem->out_num;
2386 
2387     for (i = 0; i < elem->in_num; i++) {
2388         data.in_addr[i] = elem->in_addr[i];
2389     }
2390 
2391     for (i = 0; i < elem->out_num; i++) {
2392         data.out_addr[i] = elem->out_addr[i];
2393     }
2394 
2395     for (i = 0; i < elem->in_num; i++) {
2396         /* Base is overwritten by virtqueue_map when loading.  Do not
2397          * save it, as it would leak the QEMU address space layout.  */
2398         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
2399     }
2400 
2401     for (i = 0; i < elem->out_num; i++) {
2402         /* Do not save iov_base as above.  */
2403         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
2404     }
2405 
2406     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2407         qemu_put_be32s(f, &elem->ndescs);
2408     }
2409 
2410     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2411 }
2412 
2413 /* virtio device */
2414 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
2415 {
2416     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2417     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2418 
2419     if (virtio_device_disabled(vdev)) {
2420         return;
2421     }
2422 
2423     if (k->notify) {
2424         k->notify(qbus->parent, vector);
2425     }
2426 }
2427 
2428 void virtio_update_irq(VirtIODevice *vdev)
2429 {
2430     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2431 }
2432 
2433 static int virtio_validate_features(VirtIODevice *vdev)
2434 {
2435     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2436 
2437     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2438         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2439         return -EFAULT;
2440     }
2441 
2442     if (k->validate_features) {
2443         return k->validate_features(vdev);
2444     } else {
2445         return 0;
2446     }
2447 }
2448 
2449 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2450 {
2451     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2452     trace_virtio_set_status(vdev, val);
2453 
2454     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2455         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2456             val & VIRTIO_CONFIG_S_FEATURES_OK) {
2457             int ret = virtio_validate_features(vdev);
2458 
2459             if (ret) {
2460                 return ret;
2461             }
2462         }
2463     }
2464 
2465     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2466         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2467         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2468     }
2469 
2470     if (k->set_status) {
2471         k->set_status(vdev, val);
2472     }
2473     vdev->status = val;
2474 
2475     return 0;
2476 }
2477 
2478 static enum virtio_device_endian virtio_default_endian(void)
2479 {
2480     if (target_words_bigendian()) {
2481         return VIRTIO_DEVICE_ENDIAN_BIG;
2482     } else {
2483         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2484     }
2485 }
2486 
2487 static enum virtio_device_endian virtio_current_cpu_endian(void)
2488 {
2489     if (cpu_virtio_is_big_endian(current_cpu)) {
2490         return VIRTIO_DEVICE_ENDIAN_BIG;
2491     } else {
2492         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2493     }
2494 }
2495 
2496 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2497 {
2498     vdev->vq[i].vring.desc = 0;
2499     vdev->vq[i].vring.avail = 0;
2500     vdev->vq[i].vring.used = 0;
2501     vdev->vq[i].last_avail_idx = 0;
2502     vdev->vq[i].shadow_avail_idx = 0;
2503     vdev->vq[i].used_idx = 0;
2504     vdev->vq[i].last_avail_wrap_counter = true;
2505     vdev->vq[i].shadow_avail_wrap_counter = true;
2506     vdev->vq[i].used_wrap_counter = true;
2507     virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2508     vdev->vq[i].signalled_used = 0;
2509     vdev->vq[i].signalled_used_valid = false;
2510     vdev->vq[i].notification = true;
2511     vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2512     vdev->vq[i].inuse = 0;
2513     virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2514 }
2515 
2516 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2517 {
2518     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2519 
2520     if (k->queue_reset) {
2521         k->queue_reset(vdev, queue_index);
2522     }
2523 
2524     __virtio_queue_reset(vdev, queue_index);
2525 }
2526 
2527 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2528 {
2529     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2530 
2531     /*
2532      * TODO: Seabios is currently out of spec and triggering this error.
2533      * So this needs to be fixed in Seabios, then this can
2534      * be re-enabled for new machine types only, and also after
2535      * being converted to LOG_GUEST_ERROR.
2536      *
2537     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2538         error_report("queue_enable is only suppported in devices of virtio "
2539                      "1.0 or later.");
2540     }
2541     */
2542 
2543     if (k->queue_enable) {
2544         k->queue_enable(vdev, queue_index);
2545     }
2546 }
2547 
2548 void virtio_reset(void *opaque)
2549 {
2550     VirtIODevice *vdev = opaque;
2551     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2552     int i;
2553 
2554     virtio_set_status(vdev, 0);
2555     if (current_cpu) {
2556         /* Guest initiated reset */
2557         vdev->device_endian = virtio_current_cpu_endian();
2558     } else {
2559         /* System reset */
2560         vdev->device_endian = virtio_default_endian();
2561     }
2562 
2563     if (k->reset) {
2564         k->reset(vdev);
2565     }
2566 
2567     vdev->start_on_kick = false;
2568     vdev->started = false;
2569     vdev->broken = false;
2570     vdev->guest_features = 0;
2571     vdev->queue_sel = 0;
2572     vdev->status = 0;
2573     vdev->disabled = false;
2574     qatomic_set(&vdev->isr, 0);
2575     vdev->config_vector = VIRTIO_NO_VECTOR;
2576     virtio_notify_vector(vdev, vdev->config_vector);
2577 
2578     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2579         __virtio_queue_reset(vdev, i);
2580     }
2581 }
2582 
2583 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2584 {
2585     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2586     uint8_t val;
2587 
2588     if (addr + sizeof(val) > vdev->config_len) {
2589         return (uint32_t)-1;
2590     }
2591 
2592     k->get_config(vdev, vdev->config);
2593 
2594     val = ldub_p(vdev->config + addr);
2595     return val;
2596 }
2597 
2598 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2599 {
2600     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2601     uint16_t val;
2602 
2603     if (addr + sizeof(val) > vdev->config_len) {
2604         return (uint32_t)-1;
2605     }
2606 
2607     k->get_config(vdev, vdev->config);
2608 
2609     val = lduw_p(vdev->config + addr);
2610     return val;
2611 }
2612 
2613 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2614 {
2615     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2616     uint32_t val;
2617 
2618     if (addr + sizeof(val) > vdev->config_len) {
2619         return (uint32_t)-1;
2620     }
2621 
2622     k->get_config(vdev, vdev->config);
2623 
2624     val = ldl_p(vdev->config + addr);
2625     return val;
2626 }
2627 
2628 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2629 {
2630     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2631     uint8_t val = data;
2632 
2633     if (addr + sizeof(val) > vdev->config_len) {
2634         return;
2635     }
2636 
2637     stb_p(vdev->config + addr, val);
2638 
2639     if (k->set_config) {
2640         k->set_config(vdev, vdev->config);
2641     }
2642 }
2643 
2644 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2645 {
2646     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2647     uint16_t val = data;
2648 
2649     if (addr + sizeof(val) > vdev->config_len) {
2650         return;
2651     }
2652 
2653     stw_p(vdev->config + addr, val);
2654 
2655     if (k->set_config) {
2656         k->set_config(vdev, vdev->config);
2657     }
2658 }
2659 
2660 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2661 {
2662     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2663     uint32_t val = data;
2664 
2665     if (addr + sizeof(val) > vdev->config_len) {
2666         return;
2667     }
2668 
2669     stl_p(vdev->config + addr, val);
2670 
2671     if (k->set_config) {
2672         k->set_config(vdev, vdev->config);
2673     }
2674 }
2675 
2676 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2677 {
2678     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2679     uint8_t val;
2680 
2681     if (addr + sizeof(val) > vdev->config_len) {
2682         return (uint32_t)-1;
2683     }
2684 
2685     k->get_config(vdev, vdev->config);
2686 
2687     val = ldub_p(vdev->config + addr);
2688     return val;
2689 }
2690 
2691 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2692 {
2693     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2694     uint16_t val;
2695 
2696     if (addr + sizeof(val) > vdev->config_len) {
2697         return (uint32_t)-1;
2698     }
2699 
2700     k->get_config(vdev, vdev->config);
2701 
2702     val = lduw_le_p(vdev->config + addr);
2703     return val;
2704 }
2705 
2706 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2707 {
2708     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2709     uint32_t val;
2710 
2711     if (addr + sizeof(val) > vdev->config_len) {
2712         return (uint32_t)-1;
2713     }
2714 
2715     k->get_config(vdev, vdev->config);
2716 
2717     val = ldl_le_p(vdev->config + addr);
2718     return val;
2719 }
2720 
2721 void virtio_config_modern_writeb(VirtIODevice *vdev,
2722                                  uint32_t addr, uint32_t data)
2723 {
2724     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2725     uint8_t val = data;
2726 
2727     if (addr + sizeof(val) > vdev->config_len) {
2728         return;
2729     }
2730 
2731     stb_p(vdev->config + addr, val);
2732 
2733     if (k->set_config) {
2734         k->set_config(vdev, vdev->config);
2735     }
2736 }
2737 
2738 void virtio_config_modern_writew(VirtIODevice *vdev,
2739                                  uint32_t addr, uint32_t data)
2740 {
2741     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2742     uint16_t val = data;
2743 
2744     if (addr + sizeof(val) > vdev->config_len) {
2745         return;
2746     }
2747 
2748     stw_le_p(vdev->config + addr, val);
2749 
2750     if (k->set_config) {
2751         k->set_config(vdev, vdev->config);
2752     }
2753 }
2754 
2755 void virtio_config_modern_writel(VirtIODevice *vdev,
2756                                  uint32_t addr, uint32_t data)
2757 {
2758     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2759     uint32_t val = data;
2760 
2761     if (addr + sizeof(val) > vdev->config_len) {
2762         return;
2763     }
2764 
2765     stl_le_p(vdev->config + addr, val);
2766 
2767     if (k->set_config) {
2768         k->set_config(vdev, vdev->config);
2769     }
2770 }
2771 
2772 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2773 {
2774     if (!vdev->vq[n].vring.num) {
2775         return;
2776     }
2777     vdev->vq[n].vring.desc = addr;
2778     virtio_queue_update_rings(vdev, n);
2779 }
2780 
2781 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2782 {
2783     return vdev->vq[n].vring.desc;
2784 }
2785 
2786 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2787                             hwaddr avail, hwaddr used)
2788 {
2789     if (!vdev->vq[n].vring.num) {
2790         return;
2791     }
2792     vdev->vq[n].vring.desc = desc;
2793     vdev->vq[n].vring.avail = avail;
2794     vdev->vq[n].vring.used = used;
2795     virtio_init_region_cache(vdev, n);
2796 }
2797 
2798 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2799 {
2800     /* Don't allow guest to flip queue between existent and
2801      * nonexistent states, or to set it to an invalid size.
2802      */
2803     if (!!num != !!vdev->vq[n].vring.num ||
2804         num > VIRTQUEUE_MAX_SIZE ||
2805         num < 0) {
2806         return;
2807     }
2808     vdev->vq[n].vring.num = num;
2809 }
2810 
2811 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2812 {
2813     return QLIST_FIRST(&vdev->vector_queues[vector]);
2814 }
2815 
2816 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2817 {
2818     return QLIST_NEXT(vq, node);
2819 }
2820 
2821 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2822 {
2823     return vdev->vq[n].vring.num;
2824 }
2825 
2826 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2827 {
2828     return vdev->vq[n].vring.num_default;
2829 }
2830 
2831 int virtio_get_num_queues(VirtIODevice *vdev)
2832 {
2833     int i;
2834 
2835     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2836         if (!virtio_queue_get_num(vdev, i)) {
2837             break;
2838         }
2839     }
2840 
2841     return i;
2842 }
2843 
2844 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2845 {
2846     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2847     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2848 
2849     /* virtio-1 compliant devices cannot change the alignment */
2850     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2851         error_report("tried to modify queue alignment for virtio-1 device");
2852         return;
2853     }
2854     /* Check that the transport told us it was going to do this
2855      * (so a buggy transport will immediately assert rather than
2856      * silently failing to migrate this state)
2857      */
2858     assert(k->has_variable_vring_alignment);
2859 
2860     if (align) {
2861         vdev->vq[n].vring.align = align;
2862         virtio_queue_update_rings(vdev, n);
2863     }
2864 }
2865 
2866 static void virtio_queue_notify_vq(VirtQueue *vq)
2867 {
2868     if (vq->vring.desc && vq->handle_output) {
2869         VirtIODevice *vdev = vq->vdev;
2870 
2871         if (unlikely(vdev->broken)) {
2872             return;
2873         }
2874 
2875         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2876         vq->handle_output(vdev, vq);
2877 
2878         if (unlikely(vdev->start_on_kick)) {
2879             virtio_set_started(vdev, true);
2880         }
2881     }
2882 }
2883 
2884 void virtio_queue_notify(VirtIODevice *vdev, int n)
2885 {
2886     VirtQueue *vq = &vdev->vq[n];
2887 
2888     if (unlikely(!vq->vring.desc || vdev->broken)) {
2889         return;
2890     }
2891 
2892     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2893     if (vq->host_notifier_enabled) {
2894         event_notifier_set(&vq->host_notifier);
2895     } else if (vq->handle_output) {
2896         vq->handle_output(vdev, vq);
2897 
2898         if (unlikely(vdev->start_on_kick)) {
2899             virtio_set_started(vdev, true);
2900         }
2901     }
2902 }
2903 
2904 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2905 {
2906     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2907         VIRTIO_NO_VECTOR;
2908 }
2909 
2910 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2911 {
2912     VirtQueue *vq = &vdev->vq[n];
2913 
2914     if (n < VIRTIO_QUEUE_MAX) {
2915         if (vdev->vector_queues &&
2916             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2917             QLIST_REMOVE(vq, node);
2918         }
2919         vdev->vq[n].vector = vector;
2920         if (vdev->vector_queues &&
2921             vector != VIRTIO_NO_VECTOR) {
2922             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2923         }
2924     }
2925 }
2926 
2927 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2928                             VirtIOHandleOutput handle_output)
2929 {
2930     int i;
2931 
2932     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2933         if (vdev->vq[i].vring.num == 0)
2934             break;
2935     }
2936 
2937     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2938         abort();
2939 
2940     vdev->vq[i].vring.num = queue_size;
2941     vdev->vq[i].vring.num_default = queue_size;
2942     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2943     vdev->vq[i].handle_output = handle_output;
2944     vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2945 
2946     return &vdev->vq[i];
2947 }
2948 
2949 void virtio_delete_queue(VirtQueue *vq)
2950 {
2951     vq->vring.num = 0;
2952     vq->vring.num_default = 0;
2953     vq->handle_output = NULL;
2954     g_free(vq->used_elems);
2955     vq->used_elems = NULL;
2956     virtio_virtqueue_reset_region_cache(vq);
2957 }
2958 
2959 void virtio_del_queue(VirtIODevice *vdev, int n)
2960 {
2961     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2962         abort();
2963     }
2964 
2965     virtio_delete_queue(&vdev->vq[n]);
2966 }
2967 
2968 static void virtio_set_isr(VirtIODevice *vdev, int value)
2969 {
2970     uint8_t old = qatomic_read(&vdev->isr);
2971 
2972     /* Do not write ISR if it does not change, so that its cacheline remains
2973      * shared in the common case where the guest does not read it.
2974      */
2975     if ((old & value) != value) {
2976         qatomic_or(&vdev->isr, value);
2977     }
2978 }
2979 
2980 /* Called within rcu_read_lock(). */
2981 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2982 {
2983     uint16_t old, new;
2984     bool v;
2985     /* We need to expose used array entries before checking used event. */
2986     smp_mb();
2987     /* Always notify when queue is empty (when feature acknowledge) */
2988     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2989         !vq->inuse && virtio_queue_empty(vq)) {
2990         return true;
2991     }
2992 
2993     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2994         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2995     }
2996 
2997     v = vq->signalled_used_valid;
2998     vq->signalled_used_valid = true;
2999     old = vq->signalled_used;
3000     new = vq->signalled_used = vq->used_idx;
3001     return !v || vring_need_event(vring_get_used_event(vq), new, old);
3002 }
3003 
3004 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
3005                                     uint16_t off_wrap, uint16_t new,
3006                                     uint16_t old)
3007 {
3008     int off = off_wrap & ~(1 << 15);
3009 
3010     if (wrap != off_wrap >> 15) {
3011         off -= vq->vring.num;
3012     }
3013 
3014     return vring_need_event(off, new, old);
3015 }
3016 
3017 /* Called within rcu_read_lock(). */
3018 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
3019 {
3020     VRingPackedDescEvent e;
3021     uint16_t old, new;
3022     bool v;
3023     VRingMemoryRegionCaches *caches;
3024 
3025     caches = vring_get_region_caches(vq);
3026     if (!caches) {
3027         return false;
3028     }
3029 
3030     vring_packed_event_read(vdev, &caches->avail, &e);
3031 
3032     old = vq->signalled_used;
3033     new = vq->signalled_used = vq->used_idx;
3034     v = vq->signalled_used_valid;
3035     vq->signalled_used_valid = true;
3036 
3037     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
3038         return false;
3039     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
3040         return true;
3041     }
3042 
3043     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
3044                                          e.off_wrap, new, old);
3045 }
3046 
3047 /* Called within rcu_read_lock().  */
3048 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
3049 {
3050     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3051         return virtio_packed_should_notify(vdev, vq);
3052     } else {
3053         return virtio_split_should_notify(vdev, vq);
3054     }
3055 }
3056 
3057 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
3058 {
3059     WITH_RCU_READ_LOCK_GUARD() {
3060         if (!virtio_should_notify(vdev, vq)) {
3061             return;
3062         }
3063     }
3064 
3065     trace_virtio_notify_irqfd(vdev, vq);
3066 
3067     /*
3068      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
3069      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
3070      * incorrectly polling this bit during crashdump and hibernation
3071      * in MSI mode, causing a hang if this bit is never updated.
3072      * Recent releases of Windows do not really shut down, but rather
3073      * log out and hibernate to make the next startup faster.  Hence,
3074      * this manifested as a more serious hang during shutdown with
3075      *
3076      * Next driver release from 2016 fixed this problem, so working around it
3077      * is not a must, but it's easy to do so let's do it here.
3078      *
3079      * Note: it's safe to update ISR from any thread as it was switched
3080      * to an atomic operation.
3081      */
3082     virtio_set_isr(vq->vdev, 0x1);
3083     event_notifier_set(&vq->guest_notifier);
3084 }
3085 
3086 static void virtio_irq(VirtQueue *vq)
3087 {
3088     virtio_set_isr(vq->vdev, 0x1);
3089     virtio_notify_vector(vq->vdev, vq->vector);
3090 }
3091 
3092 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
3093 {
3094     WITH_RCU_READ_LOCK_GUARD() {
3095         if (!virtio_should_notify(vdev, vq)) {
3096             return;
3097         }
3098     }
3099 
3100     trace_virtio_notify(vdev, vq);
3101     virtio_irq(vq);
3102 }
3103 
3104 void virtio_notify_config(VirtIODevice *vdev)
3105 {
3106     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
3107         return;
3108 
3109     virtio_set_isr(vdev, 0x3);
3110     vdev->generation++;
3111     virtio_notify_vector(vdev, vdev->config_vector);
3112 }
3113 
3114 static bool virtio_device_endian_needed(void *opaque)
3115 {
3116     VirtIODevice *vdev = opaque;
3117 
3118     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
3119     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3120         return vdev->device_endian != virtio_default_endian();
3121     }
3122     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
3123     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
3124 }
3125 
3126 static bool virtio_64bit_features_needed(void *opaque)
3127 {
3128     VirtIODevice *vdev = opaque;
3129 
3130     return (vdev->host_features >> 32) != 0;
3131 }
3132 
3133 static bool virtio_virtqueue_needed(void *opaque)
3134 {
3135     VirtIODevice *vdev = opaque;
3136 
3137     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
3138 }
3139 
3140 static bool virtio_packed_virtqueue_needed(void *opaque)
3141 {
3142     VirtIODevice *vdev = opaque;
3143 
3144     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
3145 }
3146 
3147 static bool virtio_ringsize_needed(void *opaque)
3148 {
3149     VirtIODevice *vdev = opaque;
3150     int i;
3151 
3152     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3153         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
3154             return true;
3155         }
3156     }
3157     return false;
3158 }
3159 
3160 static bool virtio_extra_state_needed(void *opaque)
3161 {
3162     VirtIODevice *vdev = opaque;
3163     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3164     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3165 
3166     return k->has_extra_state &&
3167         k->has_extra_state(qbus->parent);
3168 }
3169 
3170 static bool virtio_broken_needed(void *opaque)
3171 {
3172     VirtIODevice *vdev = opaque;
3173 
3174     return vdev->broken;
3175 }
3176 
3177 static bool virtio_started_needed(void *opaque)
3178 {
3179     VirtIODevice *vdev = opaque;
3180 
3181     return vdev->started;
3182 }
3183 
3184 static bool virtio_disabled_needed(void *opaque)
3185 {
3186     VirtIODevice *vdev = opaque;
3187 
3188     return vdev->disabled;
3189 }
3190 
3191 static const VMStateDescription vmstate_virtqueue = {
3192     .name = "virtqueue_state",
3193     .version_id = 1,
3194     .minimum_version_id = 1,
3195     .fields = (VMStateField[]) {
3196         VMSTATE_UINT64(vring.avail, struct VirtQueue),
3197         VMSTATE_UINT64(vring.used, struct VirtQueue),
3198         VMSTATE_END_OF_LIST()
3199     }
3200 };
3201 
3202 static const VMStateDescription vmstate_packed_virtqueue = {
3203     .name = "packed_virtqueue_state",
3204     .version_id = 1,
3205     .minimum_version_id = 1,
3206     .fields = (VMStateField[]) {
3207         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
3208         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
3209         VMSTATE_UINT16(used_idx, struct VirtQueue),
3210         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
3211         VMSTATE_UINT32(inuse, struct VirtQueue),
3212         VMSTATE_END_OF_LIST()
3213     }
3214 };
3215 
3216 static const VMStateDescription vmstate_virtio_virtqueues = {
3217     .name = "virtio/virtqueues",
3218     .version_id = 1,
3219     .minimum_version_id = 1,
3220     .needed = &virtio_virtqueue_needed,
3221     .fields = (VMStateField[]) {
3222         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3223                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
3224         VMSTATE_END_OF_LIST()
3225     }
3226 };
3227 
3228 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
3229     .name = "virtio/packed_virtqueues",
3230     .version_id = 1,
3231     .minimum_version_id = 1,
3232     .needed = &virtio_packed_virtqueue_needed,
3233     .fields = (VMStateField[]) {
3234         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3235                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
3236         VMSTATE_END_OF_LIST()
3237     }
3238 };
3239 
3240 static const VMStateDescription vmstate_ringsize = {
3241     .name = "ringsize_state",
3242     .version_id = 1,
3243     .minimum_version_id = 1,
3244     .fields = (VMStateField[]) {
3245         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
3246         VMSTATE_END_OF_LIST()
3247     }
3248 };
3249 
3250 static const VMStateDescription vmstate_virtio_ringsize = {
3251     .name = "virtio/ringsize",
3252     .version_id = 1,
3253     .minimum_version_id = 1,
3254     .needed = &virtio_ringsize_needed,
3255     .fields = (VMStateField[]) {
3256         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3257                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
3258         VMSTATE_END_OF_LIST()
3259     }
3260 };
3261 
3262 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
3263                            const VMStateField *field)
3264 {
3265     VirtIODevice *vdev = pv;
3266     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3267     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3268 
3269     if (!k->load_extra_state) {
3270         return -1;
3271     } else {
3272         return k->load_extra_state(qbus->parent, f);
3273     }
3274 }
3275 
3276 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
3277                            const VMStateField *field, JSONWriter *vmdesc)
3278 {
3279     VirtIODevice *vdev = pv;
3280     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3281     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3282 
3283     k->save_extra_state(qbus->parent, f);
3284     return 0;
3285 }
3286 
3287 static const VMStateInfo vmstate_info_extra_state = {
3288     .name = "virtqueue_extra_state",
3289     .get = get_extra_state,
3290     .put = put_extra_state,
3291 };
3292 
3293 static const VMStateDescription vmstate_virtio_extra_state = {
3294     .name = "virtio/extra_state",
3295     .version_id = 1,
3296     .minimum_version_id = 1,
3297     .needed = &virtio_extra_state_needed,
3298     .fields = (VMStateField[]) {
3299         {
3300             .name         = "extra_state",
3301             .version_id   = 0,
3302             .field_exists = NULL,
3303             .size         = 0,
3304             .info         = &vmstate_info_extra_state,
3305             .flags        = VMS_SINGLE,
3306             .offset       = 0,
3307         },
3308         VMSTATE_END_OF_LIST()
3309     }
3310 };
3311 
3312 static const VMStateDescription vmstate_virtio_device_endian = {
3313     .name = "virtio/device_endian",
3314     .version_id = 1,
3315     .minimum_version_id = 1,
3316     .needed = &virtio_device_endian_needed,
3317     .fields = (VMStateField[]) {
3318         VMSTATE_UINT8(device_endian, VirtIODevice),
3319         VMSTATE_END_OF_LIST()
3320     }
3321 };
3322 
3323 static const VMStateDescription vmstate_virtio_64bit_features = {
3324     .name = "virtio/64bit_features",
3325     .version_id = 1,
3326     .minimum_version_id = 1,
3327     .needed = &virtio_64bit_features_needed,
3328     .fields = (VMStateField[]) {
3329         VMSTATE_UINT64(guest_features, VirtIODevice),
3330         VMSTATE_END_OF_LIST()
3331     }
3332 };
3333 
3334 static const VMStateDescription vmstate_virtio_broken = {
3335     .name = "virtio/broken",
3336     .version_id = 1,
3337     .minimum_version_id = 1,
3338     .needed = &virtio_broken_needed,
3339     .fields = (VMStateField[]) {
3340         VMSTATE_BOOL(broken, VirtIODevice),
3341         VMSTATE_END_OF_LIST()
3342     }
3343 };
3344 
3345 static const VMStateDescription vmstate_virtio_started = {
3346     .name = "virtio/started",
3347     .version_id = 1,
3348     .minimum_version_id = 1,
3349     .needed = &virtio_started_needed,
3350     .fields = (VMStateField[]) {
3351         VMSTATE_BOOL(started, VirtIODevice),
3352         VMSTATE_END_OF_LIST()
3353     }
3354 };
3355 
3356 static const VMStateDescription vmstate_virtio_disabled = {
3357     .name = "virtio/disabled",
3358     .version_id = 1,
3359     .minimum_version_id = 1,
3360     .needed = &virtio_disabled_needed,
3361     .fields = (VMStateField[]) {
3362         VMSTATE_BOOL(disabled, VirtIODevice),
3363         VMSTATE_END_OF_LIST()
3364     }
3365 };
3366 
3367 static const VMStateDescription vmstate_virtio = {
3368     .name = "virtio",
3369     .version_id = 1,
3370     .minimum_version_id = 1,
3371     .fields = (VMStateField[]) {
3372         VMSTATE_END_OF_LIST()
3373     },
3374     .subsections = (const VMStateDescription*[]) {
3375         &vmstate_virtio_device_endian,
3376         &vmstate_virtio_64bit_features,
3377         &vmstate_virtio_virtqueues,
3378         &vmstate_virtio_ringsize,
3379         &vmstate_virtio_broken,
3380         &vmstate_virtio_extra_state,
3381         &vmstate_virtio_started,
3382         &vmstate_virtio_packed_virtqueues,
3383         &vmstate_virtio_disabled,
3384         NULL
3385     }
3386 };
3387 
3388 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
3389 {
3390     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3391     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3392     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3393     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
3394     int i;
3395 
3396     if (k->save_config) {
3397         k->save_config(qbus->parent, f);
3398     }
3399 
3400     qemu_put_8s(f, &vdev->status);
3401     qemu_put_8s(f, &vdev->isr);
3402     qemu_put_be16s(f, &vdev->queue_sel);
3403     qemu_put_be32s(f, &guest_features_lo);
3404     qemu_put_be32(f, vdev->config_len);
3405     qemu_put_buffer(f, vdev->config, vdev->config_len);
3406 
3407     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3408         if (vdev->vq[i].vring.num == 0)
3409             break;
3410     }
3411 
3412     qemu_put_be32(f, i);
3413 
3414     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3415         if (vdev->vq[i].vring.num == 0)
3416             break;
3417 
3418         qemu_put_be32(f, vdev->vq[i].vring.num);
3419         if (k->has_variable_vring_alignment) {
3420             qemu_put_be32(f, vdev->vq[i].vring.align);
3421         }
3422         /*
3423          * Save desc now, the rest of the ring addresses are saved in
3424          * subsections for VIRTIO-1 devices.
3425          */
3426         qemu_put_be64(f, vdev->vq[i].vring.desc);
3427         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
3428         if (k->save_queue) {
3429             k->save_queue(qbus->parent, i, f);
3430         }
3431     }
3432 
3433     if (vdc->save != NULL) {
3434         vdc->save(vdev, f);
3435     }
3436 
3437     if (vdc->vmsd) {
3438         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
3439         if (ret) {
3440             return ret;
3441         }
3442     }
3443 
3444     /* Subsections */
3445     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
3446 }
3447 
3448 /* A wrapper for use as a VMState .put function */
3449 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
3450                               const VMStateField *field, JSONWriter *vmdesc)
3451 {
3452     return virtio_save(VIRTIO_DEVICE(opaque), f);
3453 }
3454 
3455 /* A wrapper for use as a VMState .get function */
3456 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
3457                              const VMStateField *field)
3458 {
3459     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
3460     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
3461 
3462     return virtio_load(vdev, f, dc->vmsd->version_id);
3463 }
3464 
3465 const VMStateInfo  virtio_vmstate_info = {
3466     .name = "virtio",
3467     .get = virtio_device_get,
3468     .put = virtio_device_put,
3469 };
3470 
3471 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
3472 {
3473     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3474     bool bad = (val & ~(vdev->host_features)) != 0;
3475 
3476     val &= vdev->host_features;
3477     if (k->set_features) {
3478         k->set_features(vdev, val);
3479     }
3480     vdev->guest_features = val;
3481     return bad ? -1 : 0;
3482 }
3483 
3484 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
3485 {
3486     int ret;
3487     /*
3488      * The driver must not attempt to set features after feature negotiation
3489      * has finished.
3490      */
3491     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
3492         return -EINVAL;
3493     }
3494 
3495     if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
3496         qemu_log_mask(LOG_GUEST_ERROR,
3497                       "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
3498                       __func__, vdev->name);
3499     }
3500 
3501     ret = virtio_set_features_nocheck(vdev, val);
3502     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
3503         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
3504         int i;
3505         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3506             if (vdev->vq[i].vring.num != 0) {
3507                 virtio_init_region_cache(vdev, i);
3508             }
3509         }
3510     }
3511     if (!ret) {
3512         if (!virtio_device_started(vdev, vdev->status) &&
3513             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3514             vdev->start_on_kick = true;
3515         }
3516     }
3517     return ret;
3518 }
3519 
3520 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
3521                               uint64_t host_features)
3522 {
3523     size_t config_size = params->min_size;
3524     const VirtIOFeature *feature_sizes = params->feature_sizes;
3525     size_t i;
3526 
3527     for (i = 0; feature_sizes[i].flags != 0; i++) {
3528         if (host_features & feature_sizes[i].flags) {
3529             config_size = MAX(feature_sizes[i].end, config_size);
3530         }
3531     }
3532 
3533     assert(config_size <= params->max_size);
3534     return config_size;
3535 }
3536 
3537 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3538 {
3539     int i, ret;
3540     int32_t config_len;
3541     uint32_t num;
3542     uint32_t features;
3543     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3544     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3545     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3546 
3547     /*
3548      * We poison the endianness to ensure it does not get used before
3549      * subsections have been loaded.
3550      */
3551     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3552 
3553     if (k->load_config) {
3554         ret = k->load_config(qbus->parent, f);
3555         if (ret)
3556             return ret;
3557     }
3558 
3559     qemu_get_8s(f, &vdev->status);
3560     qemu_get_8s(f, &vdev->isr);
3561     qemu_get_be16s(f, &vdev->queue_sel);
3562     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3563         return -1;
3564     }
3565     qemu_get_be32s(f, &features);
3566 
3567     /*
3568      * Temporarily set guest_features low bits - needed by
3569      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3570      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3571      *
3572      * Note: devices should always test host features in future - don't create
3573      * new dependencies like this.
3574      */
3575     vdev->guest_features = features;
3576 
3577     config_len = qemu_get_be32(f);
3578 
3579     /*
3580      * There are cases where the incoming config can be bigger or smaller
3581      * than what we have; so load what we have space for, and skip
3582      * any excess that's in the stream.
3583      */
3584     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3585 
3586     while (config_len > vdev->config_len) {
3587         qemu_get_byte(f);
3588         config_len--;
3589     }
3590 
3591     num = qemu_get_be32(f);
3592 
3593     if (num > VIRTIO_QUEUE_MAX) {
3594         error_report("Invalid number of virtqueues: 0x%x", num);
3595         return -1;
3596     }
3597 
3598     for (i = 0; i < num; i++) {
3599         vdev->vq[i].vring.num = qemu_get_be32(f);
3600         if (k->has_variable_vring_alignment) {
3601             vdev->vq[i].vring.align = qemu_get_be32(f);
3602         }
3603         vdev->vq[i].vring.desc = qemu_get_be64(f);
3604         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3605         vdev->vq[i].signalled_used_valid = false;
3606         vdev->vq[i].notification = true;
3607 
3608         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3609             error_report("VQ %d address 0x0 "
3610                          "inconsistent with Host index 0x%x",
3611                          i, vdev->vq[i].last_avail_idx);
3612             return -1;
3613         }
3614         if (k->load_queue) {
3615             ret = k->load_queue(qbus->parent, i, f);
3616             if (ret)
3617                 return ret;
3618         }
3619     }
3620 
3621     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3622 
3623     if (vdc->load != NULL) {
3624         ret = vdc->load(vdev, f, version_id);
3625         if (ret) {
3626             return ret;
3627         }
3628     }
3629 
3630     if (vdc->vmsd) {
3631         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3632         if (ret) {
3633             return ret;
3634         }
3635     }
3636 
3637     /* Subsections */
3638     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3639     if (ret) {
3640         return ret;
3641     }
3642 
3643     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3644         vdev->device_endian = virtio_default_endian();
3645     }
3646 
3647     if (virtio_64bit_features_needed(vdev)) {
3648         /*
3649          * Subsection load filled vdev->guest_features.  Run them
3650          * through virtio_set_features to sanity-check them against
3651          * host_features.
3652          */
3653         uint64_t features64 = vdev->guest_features;
3654         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3655             error_report("Features 0x%" PRIx64 " unsupported. "
3656                          "Allowed features: 0x%" PRIx64,
3657                          features64, vdev->host_features);
3658             return -1;
3659         }
3660     } else {
3661         if (virtio_set_features_nocheck(vdev, features) < 0) {
3662             error_report("Features 0x%x unsupported. "
3663                          "Allowed features: 0x%" PRIx64,
3664                          features, vdev->host_features);
3665             return -1;
3666         }
3667     }
3668 
3669     if (!virtio_device_started(vdev, vdev->status) &&
3670         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3671         vdev->start_on_kick = true;
3672     }
3673 
3674     RCU_READ_LOCK_GUARD();
3675     for (i = 0; i < num; i++) {
3676         if (vdev->vq[i].vring.desc) {
3677             uint16_t nheads;
3678 
3679             /*
3680              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3681              * only the region cache needs to be set up.  Legacy devices need
3682              * to calculate used and avail ring addresses based on the desc
3683              * address.
3684              */
3685             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3686                 virtio_init_region_cache(vdev, i);
3687             } else {
3688                 virtio_queue_update_rings(vdev, i);
3689             }
3690 
3691             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3692                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3693                 vdev->vq[i].shadow_avail_wrap_counter =
3694                                         vdev->vq[i].last_avail_wrap_counter;
3695                 continue;
3696             }
3697 
3698             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3699             /* Check it isn't doing strange things with descriptor numbers. */
3700             if (nheads > vdev->vq[i].vring.num) {
3701                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3702                              "inconsistent with Host index 0x%x: delta 0x%x",
3703                              i, vdev->vq[i].vring.num,
3704                              vring_avail_idx(&vdev->vq[i]),
3705                              vdev->vq[i].last_avail_idx, nheads);
3706                 vdev->vq[i].used_idx = 0;
3707                 vdev->vq[i].shadow_avail_idx = 0;
3708                 vdev->vq[i].inuse = 0;
3709                 continue;
3710             }
3711             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3712             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3713 
3714             /*
3715              * Some devices migrate VirtQueueElements that have been popped
3716              * from the avail ring but not yet returned to the used ring.
3717              * Since max ring size < UINT16_MAX it's safe to use modulo
3718              * UINT16_MAX + 1 subtraction.
3719              */
3720             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3721                                 vdev->vq[i].used_idx);
3722             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3723                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3724                              "used_idx 0x%x",
3725                              i, vdev->vq[i].vring.num,
3726                              vdev->vq[i].last_avail_idx,
3727                              vdev->vq[i].used_idx);
3728                 return -1;
3729             }
3730         }
3731     }
3732 
3733     if (vdc->post_load) {
3734         ret = vdc->post_load(vdev);
3735         if (ret) {
3736             return ret;
3737         }
3738     }
3739 
3740     return 0;
3741 }
3742 
3743 void virtio_cleanup(VirtIODevice *vdev)
3744 {
3745     qemu_del_vm_change_state_handler(vdev->vmstate);
3746 }
3747 
3748 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3749 {
3750     VirtIODevice *vdev = opaque;
3751     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3752     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3753     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3754     vdev->vm_running = running;
3755 
3756     if (backend_run) {
3757         virtio_set_status(vdev, vdev->status);
3758     }
3759 
3760     if (k->vmstate_change) {
3761         k->vmstate_change(qbus->parent, backend_run);
3762     }
3763 
3764     if (!backend_run) {
3765         virtio_set_status(vdev, vdev->status);
3766     }
3767 }
3768 
3769 void virtio_instance_init_common(Object *proxy_obj, void *data,
3770                                  size_t vdev_size, const char *vdev_name)
3771 {
3772     DeviceState *vdev = data;
3773 
3774     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3775                                        vdev_size, vdev_name, &error_abort,
3776                                        NULL);
3777     qdev_alias_all_properties(vdev, proxy_obj);
3778 }
3779 
3780 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3781 {
3782     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3783     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3784     int i;
3785     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3786 
3787     if (nvectors) {
3788         vdev->vector_queues =
3789             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3790     }
3791 
3792     vdev->start_on_kick = false;
3793     vdev->started = false;
3794     vdev->vhost_started = false;
3795     vdev->device_id = device_id;
3796     vdev->status = 0;
3797     qatomic_set(&vdev->isr, 0);
3798     vdev->queue_sel = 0;
3799     vdev->config_vector = VIRTIO_NO_VECTOR;
3800     vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3801     vdev->vm_running = runstate_is_running();
3802     vdev->broken = false;
3803     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3804         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3805         vdev->vq[i].vdev = vdev;
3806         vdev->vq[i].queue_index = i;
3807         vdev->vq[i].host_notifier_enabled = false;
3808     }
3809 
3810     vdev->name = virtio_id_to_name(device_id);
3811     vdev->config_len = config_size;
3812     if (vdev->config_len) {
3813         vdev->config = g_malloc0(config_size);
3814     } else {
3815         vdev->config = NULL;
3816     }
3817     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3818             virtio_vmstate_change, vdev);
3819     vdev->device_endian = virtio_default_endian();
3820     vdev->use_guest_notifier_mask = true;
3821 }
3822 
3823 /*
3824  * Only devices that have already been around prior to defining the virtio
3825  * standard support legacy mode; this includes devices not specified in the
3826  * standard. All newer devices conform to the virtio standard only.
3827  */
3828 bool virtio_legacy_allowed(VirtIODevice *vdev)
3829 {
3830     switch (vdev->device_id) {
3831     case VIRTIO_ID_NET:
3832     case VIRTIO_ID_BLOCK:
3833     case VIRTIO_ID_CONSOLE:
3834     case VIRTIO_ID_RNG:
3835     case VIRTIO_ID_BALLOON:
3836     case VIRTIO_ID_RPMSG:
3837     case VIRTIO_ID_SCSI:
3838     case VIRTIO_ID_9P:
3839     case VIRTIO_ID_RPROC_SERIAL:
3840     case VIRTIO_ID_CAIF:
3841         return true;
3842     default:
3843         return false;
3844     }
3845 }
3846 
3847 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3848 {
3849     return vdev->disable_legacy_check;
3850 }
3851 
3852 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3853 {
3854     return vdev->vq[n].vring.desc;
3855 }
3856 
3857 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3858 {
3859     return virtio_queue_get_desc_addr(vdev, n) != 0;
3860 }
3861 
3862 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3863 {
3864     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3865     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3866 
3867     if (k->queue_enabled) {
3868         return k->queue_enabled(qbus->parent, n);
3869     }
3870     return virtio_queue_enabled_legacy(vdev, n);
3871 }
3872 
3873 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3874 {
3875     return vdev->vq[n].vring.avail;
3876 }
3877 
3878 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3879 {
3880     return vdev->vq[n].vring.used;
3881 }
3882 
3883 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3884 {
3885     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3886 }
3887 
3888 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3889 {
3890     int s;
3891 
3892     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3893         return sizeof(struct VRingPackedDescEvent);
3894     }
3895 
3896     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3897     return offsetof(VRingAvail, ring) +
3898         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3899 }
3900 
3901 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3902 {
3903     int s;
3904 
3905     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3906         return sizeof(struct VRingPackedDescEvent);
3907     }
3908 
3909     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3910     return offsetof(VRingUsed, ring) +
3911         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3912 }
3913 
3914 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3915                                                            int n)
3916 {
3917     unsigned int avail, used;
3918 
3919     avail = vdev->vq[n].last_avail_idx;
3920     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3921 
3922     used = vdev->vq[n].used_idx;
3923     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3924 
3925     return avail | used << 16;
3926 }
3927 
3928 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3929                                                       int n)
3930 {
3931     return vdev->vq[n].last_avail_idx;
3932 }
3933 
3934 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3935 {
3936     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3937         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3938     } else {
3939         return virtio_queue_split_get_last_avail_idx(vdev, n);
3940     }
3941 }
3942 
3943 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3944                                                    int n, unsigned int idx)
3945 {
3946     struct VirtQueue *vq = &vdev->vq[n];
3947 
3948     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3949     vq->last_avail_wrap_counter =
3950         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3951     idx >>= 16;
3952     vq->used_idx = idx & 0x7ffff;
3953     vq->used_wrap_counter = !!(idx & 0x8000);
3954 }
3955 
3956 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3957                                                   int n, unsigned int idx)
3958 {
3959         vdev->vq[n].last_avail_idx = idx;
3960         vdev->vq[n].shadow_avail_idx = idx;
3961 }
3962 
3963 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3964                                      unsigned int idx)
3965 {
3966     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3967         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3968     } else {
3969         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3970     }
3971 }
3972 
3973 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3974                                                        int n)
3975 {
3976     /* We don't have a reference like avail idx in shared memory */
3977     return;
3978 }
3979 
3980 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3981                                                       int n)
3982 {
3983     RCU_READ_LOCK_GUARD();
3984     if (vdev->vq[n].vring.desc) {
3985         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3986         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3987     }
3988 }
3989 
3990 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3991 {
3992     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3993         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3994     } else {
3995         virtio_queue_split_restore_last_avail_idx(vdev, n);
3996     }
3997 }
3998 
3999 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
4000 {
4001     /* used idx was updated through set_last_avail_idx() */
4002     return;
4003 }
4004 
4005 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
4006 {
4007     RCU_READ_LOCK_GUARD();
4008     if (vdev->vq[n].vring.desc) {
4009         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
4010     }
4011 }
4012 
4013 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
4014 {
4015     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4016         return virtio_queue_packed_update_used_idx(vdev, n);
4017     } else {
4018         return virtio_split_packed_update_used_idx(vdev, n);
4019     }
4020 }
4021 
4022 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
4023 {
4024     vdev->vq[n].signalled_used_valid = false;
4025 }
4026 
4027 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
4028 {
4029     return vdev->vq + n;
4030 }
4031 
4032 uint16_t virtio_get_queue_index(VirtQueue *vq)
4033 {
4034     return vq->queue_index;
4035 }
4036 
4037 static void virtio_queue_guest_notifier_read(EventNotifier *n)
4038 {
4039     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
4040     if (event_notifier_test_and_clear(n)) {
4041         virtio_irq(vq);
4042     }
4043 }
4044 
4045 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
4046                                                 bool with_irqfd)
4047 {
4048     if (assign && !with_irqfd) {
4049         event_notifier_set_handler(&vq->guest_notifier,
4050                                    virtio_queue_guest_notifier_read);
4051     } else {
4052         event_notifier_set_handler(&vq->guest_notifier, NULL);
4053     }
4054     if (!assign) {
4055         /* Test and clear notifier before closing it,
4056          * in case poll callback didn't have time to run. */
4057         virtio_queue_guest_notifier_read(&vq->guest_notifier);
4058     }
4059 }
4060 
4061 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
4062 {
4063     return &vq->guest_notifier;
4064 }
4065 
4066 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
4067 {
4068     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4069 
4070     virtio_queue_set_notification(vq, 0);
4071 }
4072 
4073 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
4074 {
4075     EventNotifier *n = opaque;
4076     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4077 
4078     return vq->vring.desc && !virtio_queue_empty(vq);
4079 }
4080 
4081 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
4082 {
4083     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4084 
4085     virtio_queue_notify_vq(vq);
4086 }
4087 
4088 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
4089 {
4090     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4091 
4092     /* Caller polls once more after this to catch requests that race with us */
4093     virtio_queue_set_notification(vq, 1);
4094 }
4095 
4096 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
4097 {
4098     aio_set_event_notifier(ctx, &vq->host_notifier, true,
4099                            virtio_queue_host_notifier_read,
4100                            virtio_queue_host_notifier_aio_poll,
4101                            virtio_queue_host_notifier_aio_poll_ready);
4102     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
4103                                 virtio_queue_host_notifier_aio_poll_begin,
4104                                 virtio_queue_host_notifier_aio_poll_end);
4105 }
4106 
4107 /*
4108  * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
4109  * this for rx virtqueues and similar cases where the virtqueue handler
4110  * function does not pop all elements. When the virtqueue is left non-empty
4111  * polling consumes CPU cycles and should not be used.
4112  */
4113 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
4114 {
4115     aio_set_event_notifier(ctx, &vq->host_notifier, true,
4116                            virtio_queue_host_notifier_read,
4117                            NULL, NULL);
4118 }
4119 
4120 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
4121 {
4122     aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
4123     /* Test and clear notifier before after disabling event,
4124      * in case poll callback didn't have time to run. */
4125     virtio_queue_host_notifier_read(&vq->host_notifier);
4126 }
4127 
4128 void virtio_queue_host_notifier_read(EventNotifier *n)
4129 {
4130     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4131     if (event_notifier_test_and_clear(n)) {
4132         virtio_queue_notify_vq(vq);
4133     }
4134 }
4135 
4136 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
4137 {
4138     return &vq->host_notifier;
4139 }
4140 
4141 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
4142 {
4143     vq->host_notifier_enabled = enabled;
4144 }
4145 
4146 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
4147                                       MemoryRegion *mr, bool assign)
4148 {
4149     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4150     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
4151 
4152     if (k->set_host_notifier_mr) {
4153         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
4154     }
4155 
4156     return -1;
4157 }
4158 
4159 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
4160 {
4161     g_free(vdev->bus_name);
4162     vdev->bus_name = g_strdup(bus_name);
4163 }
4164 
4165 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
4166 {
4167     va_list ap;
4168 
4169     va_start(ap, fmt);
4170     error_vreport(fmt, ap);
4171     va_end(ap);
4172 
4173     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
4174         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
4175         virtio_notify_config(vdev);
4176     }
4177 
4178     vdev->broken = true;
4179 }
4180 
4181 static void virtio_memory_listener_commit(MemoryListener *listener)
4182 {
4183     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
4184     int i;
4185 
4186     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4187         if (vdev->vq[i].vring.num == 0) {
4188             break;
4189         }
4190         virtio_init_region_cache(vdev, i);
4191     }
4192 }
4193 
4194 static void virtio_device_realize(DeviceState *dev, Error **errp)
4195 {
4196     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4197     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4198     Error *err = NULL;
4199 
4200     /* Devices should either use vmsd or the load/save methods */
4201     assert(!vdc->vmsd || !vdc->load);
4202 
4203     if (vdc->realize != NULL) {
4204         vdc->realize(dev, &err);
4205         if (err != NULL) {
4206             error_propagate(errp, err);
4207             return;
4208         }
4209     }
4210 
4211     virtio_bus_device_plugged(vdev, &err);
4212     if (err != NULL) {
4213         error_propagate(errp, err);
4214         vdc->unrealize(dev);
4215         return;
4216     }
4217 
4218     vdev->listener.commit = virtio_memory_listener_commit;
4219     vdev->listener.name = "virtio";
4220     memory_listener_register(&vdev->listener, vdev->dma_as);
4221     QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
4222 }
4223 
4224 static void virtio_device_unrealize(DeviceState *dev)
4225 {
4226     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4227     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4228 
4229     memory_listener_unregister(&vdev->listener);
4230     virtio_bus_device_unplugged(vdev);
4231 
4232     if (vdc->unrealize != NULL) {
4233         vdc->unrealize(dev);
4234     }
4235 
4236     QTAILQ_REMOVE(&virtio_list, vdev, next);
4237     g_free(vdev->bus_name);
4238     vdev->bus_name = NULL;
4239 }
4240 
4241 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
4242 {
4243     int i;
4244     if (!vdev->vq) {
4245         return;
4246     }
4247 
4248     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4249         if (vdev->vq[i].vring.num == 0) {
4250             break;
4251         }
4252         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
4253     }
4254     g_free(vdev->vq);
4255 }
4256 
4257 static void virtio_device_instance_finalize(Object *obj)
4258 {
4259     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
4260 
4261     virtio_device_free_virtqueues(vdev);
4262 
4263     g_free(vdev->config);
4264     g_free(vdev->vector_queues);
4265 }
4266 
4267 static Property virtio_properties[] = {
4268     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
4269     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
4270     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
4271     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
4272                      disable_legacy_check, false),
4273     DEFINE_PROP_END_OF_LIST(),
4274 };
4275 
4276 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
4277 {
4278     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4279     int i, n, r, err;
4280 
4281     /*
4282      * Batch all the host notifiers in a single transaction to avoid
4283      * quadratic time complexity in address_space_update_ioeventfds().
4284      */
4285     memory_region_transaction_begin();
4286     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4287         VirtQueue *vq = &vdev->vq[n];
4288         if (!virtio_queue_get_num(vdev, n)) {
4289             continue;
4290         }
4291         r = virtio_bus_set_host_notifier(qbus, n, true);
4292         if (r < 0) {
4293             err = r;
4294             goto assign_error;
4295         }
4296         event_notifier_set_handler(&vq->host_notifier,
4297                                    virtio_queue_host_notifier_read);
4298     }
4299 
4300     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4301         /* Kick right away to begin processing requests already in vring */
4302         VirtQueue *vq = &vdev->vq[n];
4303         if (!vq->vring.num) {
4304             continue;
4305         }
4306         event_notifier_set(&vq->host_notifier);
4307     }
4308     memory_region_transaction_commit();
4309     return 0;
4310 
4311 assign_error:
4312     i = n; /* save n for a second iteration after transaction is committed. */
4313     while (--n >= 0) {
4314         VirtQueue *vq = &vdev->vq[n];
4315         if (!virtio_queue_get_num(vdev, n)) {
4316             continue;
4317         }
4318 
4319         event_notifier_set_handler(&vq->host_notifier, NULL);
4320         r = virtio_bus_set_host_notifier(qbus, n, false);
4321         assert(r >= 0);
4322     }
4323     /*
4324      * The transaction expects the ioeventfds to be open when it
4325      * commits. Do it now, before the cleanup loop.
4326      */
4327     memory_region_transaction_commit();
4328 
4329     while (--i >= 0) {
4330         if (!virtio_queue_get_num(vdev, i)) {
4331             continue;
4332         }
4333         virtio_bus_cleanup_host_notifier(qbus, i);
4334     }
4335     return err;
4336 }
4337 
4338 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
4339 {
4340     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4341     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4342 
4343     return virtio_bus_start_ioeventfd(vbus);
4344 }
4345 
4346 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
4347 {
4348     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4349     int n, r;
4350 
4351     /*
4352      * Batch all the host notifiers in a single transaction to avoid
4353      * quadratic time complexity in address_space_update_ioeventfds().
4354      */
4355     memory_region_transaction_begin();
4356     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4357         VirtQueue *vq = &vdev->vq[n];
4358 
4359         if (!virtio_queue_get_num(vdev, n)) {
4360             continue;
4361         }
4362         event_notifier_set_handler(&vq->host_notifier, NULL);
4363         r = virtio_bus_set_host_notifier(qbus, n, false);
4364         assert(r >= 0);
4365     }
4366     /*
4367      * The transaction expects the ioeventfds to be open when it
4368      * commits. Do it now, before the cleanup loop.
4369      */
4370     memory_region_transaction_commit();
4371 
4372     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4373         if (!virtio_queue_get_num(vdev, n)) {
4374             continue;
4375         }
4376         virtio_bus_cleanup_host_notifier(qbus, n);
4377     }
4378 }
4379 
4380 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
4381 {
4382     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4383     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4384 
4385     return virtio_bus_grab_ioeventfd(vbus);
4386 }
4387 
4388 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
4389 {
4390     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4391     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4392 
4393     virtio_bus_release_ioeventfd(vbus);
4394 }
4395 
4396 static void virtio_device_class_init(ObjectClass *klass, void *data)
4397 {
4398     /* Set the default value here. */
4399     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4400     DeviceClass *dc = DEVICE_CLASS(klass);
4401 
4402     dc->realize = virtio_device_realize;
4403     dc->unrealize = virtio_device_unrealize;
4404     dc->bus_type = TYPE_VIRTIO_BUS;
4405     device_class_set_props(dc, virtio_properties);
4406     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
4407     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
4408 
4409     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
4410 
4411     QTAILQ_INIT(&virtio_list);
4412 }
4413 
4414 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
4415 {
4416     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4417     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4418 
4419     return virtio_bus_ioeventfd_enabled(vbus);
4420 }
4421 
4422 VirtioInfoList *qmp_x_query_virtio(Error **errp)
4423 {
4424     VirtioInfoList *list = NULL;
4425     VirtioInfoList *node;
4426     VirtIODevice *vdev;
4427 
4428     QTAILQ_FOREACH(vdev, &virtio_list, next) {
4429         DeviceState *dev = DEVICE(vdev);
4430         Error *err = NULL;
4431         QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4432 
4433         if (err == NULL) {
4434             GString *is_realized = qobject_to_json_pretty(obj, true);
4435             /* virtio device is NOT realized, remove it from list */
4436             if (!strncmp(is_realized->str, "false", 4)) {
4437                 QTAILQ_REMOVE(&virtio_list, vdev, next);
4438             } else {
4439                 node = g_new0(VirtioInfoList, 1);
4440                 node->value = g_new(VirtioInfo, 1);
4441                 node->value->path = g_strdup(dev->canonical_path);
4442                 node->value->name = g_strdup(vdev->name);
4443                 QAPI_LIST_PREPEND(list, node->value);
4444             }
4445            g_string_free(is_realized, true);
4446         }
4447         qobject_unref(obj);
4448     }
4449 
4450     return list;
4451 }
4452 
4453 static VirtIODevice *virtio_device_find(const char *path)
4454 {
4455     VirtIODevice *vdev;
4456 
4457     QTAILQ_FOREACH(vdev, &virtio_list, next) {
4458         DeviceState *dev = DEVICE(vdev);
4459 
4460         if (strcmp(dev->canonical_path, path) != 0) {
4461             continue;
4462         }
4463 
4464         Error *err = NULL;
4465         QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4466         if (err == NULL) {
4467             GString *is_realized = qobject_to_json_pretty(obj, true);
4468             /* virtio device is NOT realized, remove it from list */
4469             if (!strncmp(is_realized->str, "false", 4)) {
4470                 g_string_free(is_realized, true);
4471                 qobject_unref(obj);
4472                 QTAILQ_REMOVE(&virtio_list, vdev, next);
4473                 return NULL;
4474             }
4475             g_string_free(is_realized, true);
4476         } else {
4477             /* virtio device doesn't exist in QOM tree */
4478             QTAILQ_REMOVE(&virtio_list, vdev, next);
4479             qobject_unref(obj);
4480             return NULL;
4481         }
4482         /* device exists in QOM tree & is realized */
4483         qobject_unref(obj);
4484         return vdev;
4485     }
4486     return NULL;
4487 }
4488 
4489 #define CONVERT_FEATURES(type, map, is_status, bitmap)   \
4490     ({                                                   \
4491         type *list = NULL;                               \
4492         type *node;                                      \
4493         for (i = 0; map[i].virtio_bit != -1; i++) {      \
4494             if (is_status) {                             \
4495                 bit = map[i].virtio_bit;                 \
4496             }                                            \
4497             else {                                       \
4498                 bit = 1ULL << map[i].virtio_bit;         \
4499             }                                            \
4500             if ((bitmap & bit) == 0) {                   \
4501                 continue;                                \
4502             }                                            \
4503             node = g_new0(type, 1);                      \
4504             node->value = g_strdup(map[i].feature_desc); \
4505             node->next = list;                           \
4506             list = node;                                 \
4507             bitmap ^= bit;                               \
4508         }                                                \
4509         list;                                            \
4510     })
4511 
4512 static VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap)
4513 {
4514     VirtioDeviceStatus *status;
4515     uint8_t bit;
4516     int i;
4517 
4518     status = g_new0(VirtioDeviceStatus, 1);
4519     status->statuses = CONVERT_FEATURES(strList, virtio_config_status_map,
4520                                         1, bitmap);
4521     status->has_unknown_statuses = bitmap != 0;
4522     if (status->has_unknown_statuses) {
4523         status->unknown_statuses = bitmap;
4524     }
4525 
4526     return status;
4527 }
4528 
4529 static VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap)
4530 {
4531     VhostDeviceProtocols *vhu_protocols;
4532     uint64_t bit;
4533     int i;
4534 
4535     vhu_protocols = g_new0(VhostDeviceProtocols, 1);
4536     vhu_protocols->protocols =
4537                     CONVERT_FEATURES(strList,
4538                                      vhost_user_protocol_map, 0, bitmap);
4539     vhu_protocols->has_unknown_protocols = bitmap != 0;
4540     if (vhu_protocols->has_unknown_protocols) {
4541         vhu_protocols->unknown_protocols = bitmap;
4542     }
4543 
4544     return vhu_protocols;
4545 }
4546 
4547 static VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
4548                                                  uint64_t bitmap)
4549 {
4550     VirtioDeviceFeatures *features;
4551     uint64_t bit;
4552     int i;
4553 
4554     features = g_new0(VirtioDeviceFeatures, 1);
4555     features->has_dev_features = true;
4556 
4557     /* transport features */
4558     features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0,
4559                                             bitmap);
4560 
4561     /* device features */
4562     switch (device_id) {
4563 #ifdef CONFIG_VIRTIO_SERIAL
4564     case VIRTIO_ID_CONSOLE:
4565         features->dev_features =
4566             CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap);
4567         break;
4568 #endif
4569 #ifdef CONFIG_VIRTIO_BLK
4570     case VIRTIO_ID_BLOCK:
4571         features->dev_features =
4572             CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap);
4573         break;
4574 #endif
4575 #ifdef CONFIG_VIRTIO_GPU
4576     case VIRTIO_ID_GPU:
4577         features->dev_features =
4578             CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap);
4579         break;
4580 #endif
4581 #ifdef CONFIG_VIRTIO_NET
4582     case VIRTIO_ID_NET:
4583         features->dev_features =
4584             CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap);
4585         break;
4586 #endif
4587 #ifdef CONFIG_VIRTIO_SCSI
4588     case VIRTIO_ID_SCSI:
4589         features->dev_features =
4590             CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap);
4591         break;
4592 #endif
4593 #ifdef CONFIG_VIRTIO_BALLOON
4594     case VIRTIO_ID_BALLOON:
4595         features->dev_features =
4596             CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap);
4597         break;
4598 #endif
4599 #ifdef CONFIG_VIRTIO_IOMMU
4600     case VIRTIO_ID_IOMMU:
4601         features->dev_features =
4602             CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap);
4603         break;
4604 #endif
4605 #ifdef CONFIG_VIRTIO_INPUT
4606     case VIRTIO_ID_INPUT:
4607         features->dev_features =
4608             CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap);
4609         break;
4610 #endif
4611 #ifdef CONFIG_VHOST_USER_FS
4612     case VIRTIO_ID_FS:
4613         features->dev_features =
4614             CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap);
4615         break;
4616 #endif
4617 #ifdef CONFIG_VHOST_VSOCK
4618     case VIRTIO_ID_VSOCK:
4619         features->dev_features =
4620             CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap);
4621         break;
4622 #endif
4623 #ifdef CONFIG_VIRTIO_CRYPTO
4624     case VIRTIO_ID_CRYPTO:
4625         features->dev_features =
4626             CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap);
4627         break;
4628 #endif
4629 #ifdef CONFIG_VIRTIO_MEM
4630     case VIRTIO_ID_MEM:
4631         features->dev_features =
4632             CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap);
4633         break;
4634 #endif
4635 #ifdef CONFIG_VIRTIO_I2C_ADAPTER
4636     case VIRTIO_ID_I2C_ADAPTER:
4637         features->dev_features =
4638             CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap);
4639         break;
4640 #endif
4641 #ifdef CONFIG_VIRTIO_RNG
4642     case VIRTIO_ID_RNG:
4643         features->dev_features =
4644             CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap);
4645         break;
4646 #endif
4647     /* No features */
4648     case VIRTIO_ID_9P:
4649     case VIRTIO_ID_PMEM:
4650     case VIRTIO_ID_IOMEM:
4651     case VIRTIO_ID_RPMSG:
4652     case VIRTIO_ID_CLOCK:
4653     case VIRTIO_ID_MAC80211_WLAN:
4654     case VIRTIO_ID_MAC80211_HWSIM:
4655     case VIRTIO_ID_RPROC_SERIAL:
4656     case VIRTIO_ID_MEMORY_BALLOON:
4657     case VIRTIO_ID_CAIF:
4658     case VIRTIO_ID_SIGNAL_DIST:
4659     case VIRTIO_ID_PSTORE:
4660     case VIRTIO_ID_SOUND:
4661     case VIRTIO_ID_BT:
4662     case VIRTIO_ID_RPMB:
4663     case VIRTIO_ID_VIDEO_ENCODER:
4664     case VIRTIO_ID_VIDEO_DECODER:
4665     case VIRTIO_ID_SCMI:
4666     case VIRTIO_ID_NITRO_SEC_MOD:
4667     case VIRTIO_ID_WATCHDOG:
4668     case VIRTIO_ID_CAN:
4669     case VIRTIO_ID_DMABUF:
4670     case VIRTIO_ID_PARAM_SERV:
4671     case VIRTIO_ID_AUDIO_POLICY:
4672     case VIRTIO_ID_GPIO:
4673         break;
4674     default:
4675         g_assert_not_reached();
4676     }
4677 
4678     features->has_unknown_dev_features = bitmap != 0;
4679     if (features->has_unknown_dev_features) {
4680         features->unknown_dev_features = bitmap;
4681     }
4682 
4683     return features;
4684 }
4685 
4686 VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
4687 {
4688     VirtIODevice *vdev;
4689     VirtioStatus *status;
4690 
4691     vdev = virtio_device_find(path);
4692     if (vdev == NULL) {
4693         error_setg(errp, "Path %s is not a VirtIODevice", path);
4694         return NULL;
4695     }
4696 
4697     status = g_new0(VirtioStatus, 1);
4698     status->name = g_strdup(vdev->name);
4699     status->device_id = vdev->device_id;
4700     status->vhost_started = vdev->vhost_started;
4701     status->guest_features = qmp_decode_features(vdev->device_id,
4702                                                  vdev->guest_features);
4703     status->host_features = qmp_decode_features(vdev->device_id,
4704                                                 vdev->host_features);
4705     status->backend_features = qmp_decode_features(vdev->device_id,
4706                                                    vdev->backend_features);
4707 
4708     switch (vdev->device_endian) {
4709     case VIRTIO_DEVICE_ENDIAN_LITTLE:
4710         status->device_endian = g_strdup("little");
4711         break;
4712     case VIRTIO_DEVICE_ENDIAN_BIG:
4713         status->device_endian = g_strdup("big");
4714         break;
4715     default:
4716         status->device_endian = g_strdup("unknown");
4717         break;
4718     }
4719 
4720     status->num_vqs = virtio_get_num_queues(vdev);
4721     status->status = qmp_decode_status(vdev->status);
4722     status->isr = vdev->isr;
4723     status->queue_sel = vdev->queue_sel;
4724     status->vm_running = vdev->vm_running;
4725     status->broken = vdev->broken;
4726     status->disabled = vdev->disabled;
4727     status->use_started = vdev->use_started;
4728     status->started = vdev->started;
4729     status->start_on_kick = vdev->start_on_kick;
4730     status->disable_legacy_check = vdev->disable_legacy_check;
4731     status->bus_name = g_strdup(vdev->bus_name);
4732     status->use_guest_notifier_mask = vdev->use_guest_notifier_mask;
4733 
4734     if (vdev->vhost_started) {
4735         VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4736         struct vhost_dev *hdev = vdc->get_vhost(vdev);
4737 
4738         status->vhost_dev = g_new0(VhostStatus, 1);
4739         status->vhost_dev->n_mem_sections = hdev->n_mem_sections;
4740         status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections;
4741         status->vhost_dev->nvqs = hdev->nvqs;
4742         status->vhost_dev->vq_index = hdev->vq_index;
4743         status->vhost_dev->features =
4744             qmp_decode_features(vdev->device_id, hdev->features);
4745         status->vhost_dev->acked_features =
4746             qmp_decode_features(vdev->device_id, hdev->acked_features);
4747         status->vhost_dev->backend_features =
4748             qmp_decode_features(vdev->device_id, hdev->backend_features);
4749         status->vhost_dev->protocol_features =
4750             qmp_decode_protocols(hdev->protocol_features);
4751         status->vhost_dev->max_queues = hdev->max_queues;
4752         status->vhost_dev->backend_cap = hdev->backend_cap;
4753         status->vhost_dev->log_enabled = hdev->log_enabled;
4754         status->vhost_dev->log_size = hdev->log_size;
4755     }
4756 
4757     return status;
4758 }
4759 
4760 VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
4761                                                             uint16_t queue,
4762                                                             Error **errp)
4763 {
4764     VirtIODevice *vdev;
4765     VirtVhostQueueStatus *status;
4766 
4767     vdev = virtio_device_find(path);
4768     if (vdev == NULL) {
4769         error_setg(errp, "Path %s is not a VirtIODevice", path);
4770         return NULL;
4771     }
4772 
4773     if (!vdev->vhost_started) {
4774         error_setg(errp, "Error: vhost device has not started yet");
4775         return NULL;
4776     }
4777 
4778     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4779     struct vhost_dev *hdev = vdc->get_vhost(vdev);
4780 
4781     if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) {
4782         error_setg(errp, "Invalid vhost virtqueue number %d", queue);
4783         return NULL;
4784     }
4785 
4786     status = g_new0(VirtVhostQueueStatus, 1);
4787     status->name = g_strdup(vdev->name);
4788     status->kick = hdev->vqs[queue].kick;
4789     status->call = hdev->vqs[queue].call;
4790     status->desc = (uintptr_t)hdev->vqs[queue].desc;
4791     status->avail = (uintptr_t)hdev->vqs[queue].avail;
4792     status->used = (uintptr_t)hdev->vqs[queue].used;
4793     status->num = hdev->vqs[queue].num;
4794     status->desc_phys = hdev->vqs[queue].desc_phys;
4795     status->desc_size = hdev->vqs[queue].desc_size;
4796     status->avail_phys = hdev->vqs[queue].avail_phys;
4797     status->avail_size = hdev->vqs[queue].avail_size;
4798     status->used_phys = hdev->vqs[queue].used_phys;
4799     status->used_size = hdev->vqs[queue].used_size;
4800 
4801     return status;
4802 }
4803 
4804 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4805                                                  uint16_t queue,
4806                                                  Error **errp)
4807 {
4808     VirtIODevice *vdev;
4809     VirtQueueStatus *status;
4810 
4811     vdev = virtio_device_find(path);
4812     if (vdev == NULL) {
4813         error_setg(errp, "Path %s is not a VirtIODevice", path);
4814         return NULL;
4815     }
4816 
4817     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4818         error_setg(errp, "Invalid virtqueue number %d", queue);
4819         return NULL;
4820     }
4821 
4822     status = g_new0(VirtQueueStatus, 1);
4823     status->name = g_strdup(vdev->name);
4824     status->queue_index = vdev->vq[queue].queue_index;
4825     status->inuse = vdev->vq[queue].inuse;
4826     status->vring_num = vdev->vq[queue].vring.num;
4827     status->vring_num_default = vdev->vq[queue].vring.num_default;
4828     status->vring_align = vdev->vq[queue].vring.align;
4829     status->vring_desc = vdev->vq[queue].vring.desc;
4830     status->vring_avail = vdev->vq[queue].vring.avail;
4831     status->vring_used = vdev->vq[queue].vring.used;
4832     status->used_idx = vdev->vq[queue].used_idx;
4833     status->signalled_used = vdev->vq[queue].signalled_used;
4834     status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4835 
4836     if (vdev->vhost_started) {
4837         VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4838         struct vhost_dev *hdev = vdc->get_vhost(vdev);
4839 
4840         /* check if vq index exists for vhost as well  */
4841         if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4842             status->has_last_avail_idx = true;
4843 
4844             int vhost_vq_index =
4845                 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4846             struct vhost_vring_state state = {
4847                 .index = vhost_vq_index,
4848             };
4849 
4850             status->last_avail_idx =
4851                 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4852         }
4853     } else {
4854         status->has_shadow_avail_idx = true;
4855         status->has_last_avail_idx = true;
4856         status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4857         status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4858     }
4859 
4860     return status;
4861 }
4862 
4863 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4864 {
4865     strList *list = NULL;
4866     strList *node;
4867     int i;
4868 
4869     struct {
4870         uint16_t flag;
4871         const char *value;
4872     } map[] = {
4873         { VRING_DESC_F_NEXT, "next" },
4874         { VRING_DESC_F_WRITE, "write" },
4875         { VRING_DESC_F_INDIRECT, "indirect" },
4876         { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4877         { 1 << VRING_PACKED_DESC_F_USED, "used" },
4878         { 0, "" }
4879     };
4880 
4881     for (i = 0; map[i].flag; i++) {
4882         if ((map[i].flag & flags) == 0) {
4883             continue;
4884         }
4885         node = g_malloc0(sizeof(strList));
4886         node->value = g_strdup(map[i].value);
4887         node->next = list;
4888         list = node;
4889     }
4890 
4891     return list;
4892 }
4893 
4894 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4895                                                      uint16_t queue,
4896                                                      bool has_index,
4897                                                      uint16_t index,
4898                                                      Error **errp)
4899 {
4900     VirtIODevice *vdev;
4901     VirtQueue *vq;
4902     VirtioQueueElement *element = NULL;
4903 
4904     vdev = virtio_device_find(path);
4905     if (vdev == NULL) {
4906         error_setg(errp, "Path %s is not a VirtIO device", path);
4907         return NULL;
4908     }
4909 
4910     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4911         error_setg(errp, "Invalid virtqueue number %d", queue);
4912         return NULL;
4913     }
4914     vq = &vdev->vq[queue];
4915 
4916     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4917         error_setg(errp, "Packed ring not supported");
4918         return NULL;
4919     } else {
4920         unsigned int head, i, max;
4921         VRingMemoryRegionCaches *caches;
4922         MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
4923         MemoryRegionCache *desc_cache;
4924         VRingDesc desc;
4925         VirtioRingDescList *list = NULL;
4926         VirtioRingDescList *node;
4927         int rc; int ndescs;
4928 
4929         RCU_READ_LOCK_GUARD();
4930 
4931         max = vq->vring.num;
4932 
4933         if (!has_index) {
4934             head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4935         } else {
4936             head = vring_avail_ring(vq, index % vq->vring.num);
4937         }
4938         i = head;
4939 
4940         caches = vring_get_region_caches(vq);
4941         if (!caches) {
4942             error_setg(errp, "Region caches not initialized");
4943             return NULL;
4944         }
4945         if (caches->desc.len < max * sizeof(VRingDesc)) {
4946             error_setg(errp, "Cannot map descriptor ring");
4947             return NULL;
4948         }
4949 
4950         desc_cache = &caches->desc;
4951         vring_split_desc_read(vdev, &desc, desc_cache, i);
4952         if (desc.flags & VRING_DESC_F_INDIRECT) {
4953             int64_t len;
4954             len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4955                                            desc.addr, desc.len, false);
4956             desc_cache = &indirect_desc_cache;
4957             if (len < desc.len) {
4958                 error_setg(errp, "Cannot map indirect buffer");
4959                 goto done;
4960             }
4961 
4962             max = desc.len / sizeof(VRingDesc);
4963             i = 0;
4964             vring_split_desc_read(vdev, &desc, desc_cache, i);
4965         }
4966 
4967         element = g_new0(VirtioQueueElement, 1);
4968         element->avail = g_new0(VirtioRingAvail, 1);
4969         element->used = g_new0(VirtioRingUsed, 1);
4970         element->name = g_strdup(vdev->name);
4971         element->index = head;
4972         element->avail->flags = vring_avail_flags(vq);
4973         element->avail->idx = vring_avail_idx(vq);
4974         element->avail->ring = head;
4975         element->used->flags = vring_used_flags(vq);
4976         element->used->idx = vring_used_idx(vq);
4977         ndescs = 0;
4978 
4979         do {
4980             /* A buggy driver may produce an infinite loop */
4981             if (ndescs >= max) {
4982                 break;
4983             }
4984             node = g_new0(VirtioRingDescList, 1);
4985             node->value = g_new0(VirtioRingDesc, 1);
4986             node->value->addr = desc.addr;
4987             node->value->len = desc.len;
4988             node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4989             node->next = list;
4990             list = node;
4991 
4992             ndescs++;
4993             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4994                                                 max, &i);
4995         } while (rc == VIRTQUEUE_READ_DESC_MORE);
4996         element->descs = list;
4997 done:
4998         address_space_cache_destroy(&indirect_desc_cache);
4999     }
5000 
5001     return element;
5002 }
5003 
5004 static const TypeInfo virtio_device_info = {
5005     .name = TYPE_VIRTIO_DEVICE,
5006     .parent = TYPE_DEVICE,
5007     .instance_size = sizeof(VirtIODevice),
5008     .class_init = virtio_device_class_init,
5009     .instance_finalize = virtio_device_instance_finalize,
5010     .abstract = true,
5011     .class_size = sizeof(VirtioDeviceClass),
5012 };
5013 
5014 static void virtio_register_types(void)
5015 {
5016     type_register_static(&virtio_device_info);
5017 }
5018 
5019 type_init(virtio_register_types)
5020