1 /*
2 * Virtio PCI Bindings
3 *
4 * Copyright IBM, Corp. 2007
5 * Copyright (c) 2009 CodeSourcery
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paul Brook <paul@codesourcery.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
18 #include "qemu/osdep.h"
19
20 #include "exec/memop.h"
21 #include "standard-headers/linux/virtio_pci.h"
22 #include "standard-headers/linux/virtio_ids.h"
23 #include "hw/boards.h"
24 #include "hw/virtio/virtio.h"
25 #include "migration/qemu-file-types.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/pci_bus.h"
28 #include "hw/qdev-properties.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "qemu/log.h"
32 #include "qemu/module.h"
33 #include "qemu/bswap.h"
34 #include "hw/pci/msi.h"
35 #include "hw/pci/msix.h"
36 #include "hw/loader.h"
37 #include "system/kvm.h"
38 #include "hw/virtio/virtio-pci.h"
39 #include "qemu/range.h"
40 #include "hw/virtio/virtio-bus.h"
41 #include "qapi/visitor.h"
42 #include "system/replay.h"
43 #include "trace.h"
44
45 #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
46
47 #undef VIRTIO_PCI_CONFIG
48
49 /* The remaining space is defined by each driver as the per-driver
50 * configuration space */
51 #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
52
53 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
54 VirtIOPCIProxy *dev);
55 static void virtio_pci_reset(DeviceState *qdev);
56
57 /* virtio device */
58 /* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
to_virtio_pci_proxy(DeviceState * d)59 static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
60 {
61 return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
62 }
63
64 /* DeviceState to VirtIOPCIProxy. Note: used on datapath,
65 * be careful and test performance if you change this.
66 */
to_virtio_pci_proxy_fast(DeviceState * d)67 static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
68 {
69 return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
70 }
71
virtio_pci_notify(DeviceState * d,uint16_t vector)72 static void virtio_pci_notify(DeviceState *d, uint16_t vector)
73 {
74 VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
75
76 if (msix_enabled(&proxy->pci_dev)) {
77 if (vector != VIRTIO_NO_VECTOR) {
78 msix_notify(&proxy->pci_dev, vector);
79 }
80 } else {
81 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
82 pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
83 }
84 }
85
virtio_pci_save_config(DeviceState * d,QEMUFile * f)86 static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
87 {
88 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
89 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
90
91 pci_device_save(&proxy->pci_dev, f);
92 msix_save(&proxy->pci_dev, f);
93 if (msix_present(&proxy->pci_dev))
94 qemu_put_be16(f, vdev->config_vector);
95 }
96
97 static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
98 .name = "virtio_pci/modern_queue_state",
99 .version_id = 1,
100 .minimum_version_id = 1,
101 .fields = (const VMStateField[]) {
102 VMSTATE_UINT16(num, VirtIOPCIQueue),
103 VMSTATE_UNUSED(1), /* enabled was stored as be16 */
104 VMSTATE_BOOL(enabled, VirtIOPCIQueue),
105 VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
106 VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
107 VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
108 VMSTATE_END_OF_LIST()
109 }
110 };
111
virtio_pci_modern_state_needed(void * opaque)112 static bool virtio_pci_modern_state_needed(void *opaque)
113 {
114 VirtIOPCIProxy *proxy = opaque;
115
116 return virtio_pci_modern(proxy);
117 }
118
119 static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
120 .name = "virtio_pci/modern_state",
121 .version_id = 1,
122 .minimum_version_id = 1,
123 .needed = &virtio_pci_modern_state_needed,
124 .fields = (const VMStateField[]) {
125 VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
126 VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
127 VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
128 VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
129 vmstate_virtio_pci_modern_queue_state,
130 VirtIOPCIQueue),
131 VMSTATE_END_OF_LIST()
132 }
133 };
134
135 static const VMStateDescription vmstate_virtio_pci = {
136 .name = "virtio_pci",
137 .version_id = 1,
138 .minimum_version_id = 1,
139 .fields = (const VMStateField[]) {
140 VMSTATE_END_OF_LIST()
141 },
142 .subsections = (const VMStateDescription * const []) {
143 &vmstate_virtio_pci_modern_state_sub,
144 NULL
145 }
146 };
147
virtio_pci_has_extra_state(DeviceState * d)148 static bool virtio_pci_has_extra_state(DeviceState *d)
149 {
150 return true;
151 }
152
virtio_pci_save_extra_state(DeviceState * d,QEMUFile * f)153 static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
154 {
155 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
156
157 vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
158 }
159
virtio_pci_load_extra_state(DeviceState * d,QEMUFile * f)160 static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
161 {
162 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
163
164 return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
165 }
166
virtio_pci_save_queue(DeviceState * d,int n,QEMUFile * f)167 static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
168 {
169 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
170 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
171
172 if (msix_present(&proxy->pci_dev))
173 qemu_put_be16(f, virtio_queue_vector(vdev, n));
174 }
175
virtio_pci_load_config(DeviceState * d,QEMUFile * f)176 static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
177 {
178 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
179 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
180 uint16_t vector;
181
182 int ret;
183 ret = pci_device_load(&proxy->pci_dev, f);
184 if (ret) {
185 return ret;
186 }
187 msix_unuse_all_vectors(&proxy->pci_dev);
188 msix_load(&proxy->pci_dev, f);
189 if (msix_present(&proxy->pci_dev)) {
190 qemu_get_be16s(f, &vector);
191
192 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
193 return -EINVAL;
194 }
195 } else {
196 vector = VIRTIO_NO_VECTOR;
197 }
198 vdev->config_vector = vector;
199 if (vector != VIRTIO_NO_VECTOR) {
200 msix_vector_use(&proxy->pci_dev, vector);
201 }
202 return 0;
203 }
204
virtio_pci_load_queue(DeviceState * d,int n,QEMUFile * f)205 static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
206 {
207 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
208 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
209
210 uint16_t vector;
211 if (msix_present(&proxy->pci_dev)) {
212 qemu_get_be16s(f, &vector);
213 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
214 return -EINVAL;
215 }
216 } else {
217 vector = VIRTIO_NO_VECTOR;
218 }
219 virtio_queue_set_vector(vdev, n, vector);
220 if (vector != VIRTIO_NO_VECTOR) {
221 msix_vector_use(&proxy->pci_dev, vector);
222 }
223
224 return 0;
225 }
226
227 typedef struct VirtIOPCIIDInfo {
228 /* virtio id */
229 uint16_t vdev_id;
230 /* pci device id for the transitional device */
231 uint16_t trans_devid;
232 uint16_t class_id;
233 } VirtIOPCIIDInfo;
234
235 static const VirtIOPCIIDInfo virtio_pci_id_info[] = {
236 {
237 .vdev_id = VIRTIO_ID_CRYPTO,
238 .class_id = PCI_CLASS_OTHERS,
239 }, {
240 .vdev_id = VIRTIO_ID_FS,
241 .class_id = PCI_CLASS_STORAGE_OTHER,
242 }, {
243 .vdev_id = VIRTIO_ID_NET,
244 .trans_devid = PCI_DEVICE_ID_VIRTIO_NET,
245 .class_id = PCI_CLASS_NETWORK_ETHERNET,
246 }, {
247 .vdev_id = VIRTIO_ID_BLOCK,
248 .trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK,
249 .class_id = PCI_CLASS_STORAGE_SCSI,
250 }, {
251 .vdev_id = VIRTIO_ID_CONSOLE,
252 .trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE,
253 .class_id = PCI_CLASS_COMMUNICATION_OTHER,
254 }, {
255 .vdev_id = VIRTIO_ID_SCSI,
256 .trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI,
257 .class_id = PCI_CLASS_STORAGE_SCSI
258 }, {
259 .vdev_id = VIRTIO_ID_9P,
260 .trans_devid = PCI_DEVICE_ID_VIRTIO_9P,
261 .class_id = PCI_BASE_CLASS_NETWORK,
262 }, {
263 .vdev_id = VIRTIO_ID_BALLOON,
264 .trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON,
265 .class_id = PCI_CLASS_OTHERS,
266 }, {
267 .vdev_id = VIRTIO_ID_RNG,
268 .trans_devid = PCI_DEVICE_ID_VIRTIO_RNG,
269 .class_id = PCI_CLASS_OTHERS,
270 },
271 };
272
virtio_pci_get_id_info(uint16_t vdev_id)273 static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id)
274 {
275 const VirtIOPCIIDInfo *info = NULL;
276 int i;
277
278 for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) {
279 if (virtio_pci_id_info[i].vdev_id == vdev_id) {
280 info = &virtio_pci_id_info[i];
281 break;
282 }
283 }
284
285 if (!info) {
286 /* The device id is invalid or not added to the id_info yet. */
287 error_report("Invalid virtio device(id %u)", vdev_id);
288 abort();
289 }
290
291 return info;
292 }
293
294 /*
295 * Get the Transitional Device ID for the specific device, return
296 * zero if the device is non-transitional.
297 */
virtio_pci_get_trans_devid(uint16_t device_id)298 uint16_t virtio_pci_get_trans_devid(uint16_t device_id)
299 {
300 return virtio_pci_get_id_info(device_id)->trans_devid;
301 }
302
303 /*
304 * Get the Class ID for the specific device.
305 */
virtio_pci_get_class_id(uint16_t device_id)306 uint16_t virtio_pci_get_class_id(uint16_t device_id)
307 {
308 return virtio_pci_get_id_info(device_id)->class_id;
309 }
310
virtio_pci_ioeventfd_enabled(DeviceState * d)311 static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
312 {
313 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
314
315 return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
316 }
317
318 #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
319
virtio_pci_queue_mem_mult(struct VirtIOPCIProxy * proxy)320 static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
321 {
322 return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
323 QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
324 }
325
virtio_pci_ioeventfd_assign(DeviceState * d,EventNotifier * notifier,int n,bool assign)326 static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
327 int n, bool assign)
328 {
329 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
330 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
331 VirtQueue *vq = virtio_get_queue(vdev, n);
332 bool legacy = virtio_pci_legacy(proxy);
333 bool modern = virtio_pci_modern(proxy);
334 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
335 MemoryRegion *modern_mr = &proxy->notify.mr;
336 MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
337 MemoryRegion *legacy_mr = &proxy->bar;
338 hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
339 virtio_get_queue_index(vq);
340 hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
341
342 if (assign) {
343 if (modern) {
344 memory_region_add_eventfd(modern_mr, modern_addr, 0,
345 false, n, notifier);
346 if (modern_pio) {
347 memory_region_add_eventfd(modern_notify_mr, 0, 2,
348 true, n, notifier);
349 }
350 }
351 if (legacy) {
352 memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
353 true, n, notifier);
354 }
355 } else {
356 if (modern) {
357 memory_region_del_eventfd(modern_mr, modern_addr, 0,
358 false, n, notifier);
359 if (modern_pio) {
360 memory_region_del_eventfd(modern_notify_mr, 0, 2,
361 true, n, notifier);
362 }
363 }
364 if (legacy) {
365 memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
366 true, n, notifier);
367 }
368 }
369 return 0;
370 }
371
virtio_pci_start_ioeventfd(VirtIOPCIProxy * proxy)372 static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
373 {
374 virtio_bus_start_ioeventfd(&proxy->bus);
375 }
376
virtio_pci_stop_ioeventfd(VirtIOPCIProxy * proxy)377 static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
378 {
379 virtio_bus_stop_ioeventfd(&proxy->bus);
380 }
381
virtio_ioport_write(void * opaque,uint32_t addr,uint32_t val)382 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
383 {
384 VirtIOPCIProxy *proxy = opaque;
385 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
386 uint16_t vector, vq_idx;
387 hwaddr pa;
388
389 switch (addr) {
390 case VIRTIO_PCI_GUEST_FEATURES:
391 /* Guest does not negotiate properly? We have to assume nothing. */
392 if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
393 val = virtio_bus_get_vdev_bad_features(&proxy->bus);
394 }
395 virtio_set_features(vdev, val);
396 break;
397 case VIRTIO_PCI_QUEUE_PFN:
398 pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
399 if (pa == 0) {
400 virtio_pci_reset(DEVICE(proxy));
401 }
402 else
403 virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
404 break;
405 case VIRTIO_PCI_QUEUE_SEL:
406 if (val < VIRTIO_QUEUE_MAX)
407 vdev->queue_sel = val;
408 break;
409 case VIRTIO_PCI_QUEUE_NOTIFY:
410 vq_idx = val;
411 if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
412 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
413 VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
414
415 virtio_queue_set_shadow_avail_idx(vq, val >> 16);
416 }
417 virtio_queue_notify(vdev, vq_idx);
418 }
419 break;
420 case VIRTIO_PCI_STATUS:
421 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
422 virtio_pci_stop_ioeventfd(proxy);
423 }
424
425 virtio_set_status(vdev, val & 0xFF);
426
427 if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
428 virtio_pci_start_ioeventfd(proxy);
429 }
430
431 if (vdev->status == 0) {
432 virtio_pci_reset(DEVICE(proxy));
433 }
434
435 /* Linux before 2.6.34 drives the device without enabling
436 the PCI device bus master bit. Enable it automatically
437 for the guest. This is a PCI spec violation but so is
438 initiating DMA with bus master bit clear. */
439 if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
440 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
441 proxy->pci_dev.config[PCI_COMMAND] |
442 PCI_COMMAND_MASTER, 1);
443 }
444 break;
445 case VIRTIO_MSI_CONFIG_VECTOR:
446 if (vdev->config_vector != VIRTIO_NO_VECTOR) {
447 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
448 }
449 /* Make it possible for guest to discover an error took place. */
450 if (val < proxy->nvectors) {
451 msix_vector_use(&proxy->pci_dev, val);
452 } else {
453 val = VIRTIO_NO_VECTOR;
454 }
455 vdev->config_vector = val;
456 break;
457 case VIRTIO_MSI_QUEUE_VECTOR:
458 vector = virtio_queue_vector(vdev, vdev->queue_sel);
459 if (vector != VIRTIO_NO_VECTOR) {
460 msix_vector_unuse(&proxy->pci_dev, vector);
461 }
462 /* Make it possible for guest to discover an error took place. */
463 if (val < proxy->nvectors) {
464 msix_vector_use(&proxy->pci_dev, val);
465 } else {
466 val = VIRTIO_NO_VECTOR;
467 }
468 virtio_queue_set_vector(vdev, vdev->queue_sel, val);
469 break;
470 default:
471 qemu_log_mask(LOG_GUEST_ERROR,
472 "%s: unexpected address 0x%x value 0x%x\n",
473 __func__, addr, val);
474 break;
475 }
476 }
477
virtio_ioport_read(VirtIOPCIProxy * proxy,uint32_t addr)478 static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
479 {
480 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
481 uint32_t ret = 0xFFFFFFFF;
482
483 switch (addr) {
484 case VIRTIO_PCI_HOST_FEATURES:
485 ret = vdev->host_features;
486 break;
487 case VIRTIO_PCI_GUEST_FEATURES:
488 ret = vdev->guest_features;
489 break;
490 case VIRTIO_PCI_QUEUE_PFN:
491 ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
492 >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
493 break;
494 case VIRTIO_PCI_QUEUE_NUM:
495 ret = virtio_queue_get_num(vdev, vdev->queue_sel);
496 break;
497 case VIRTIO_PCI_QUEUE_SEL:
498 ret = vdev->queue_sel;
499 break;
500 case VIRTIO_PCI_STATUS:
501 ret = vdev->status;
502 break;
503 case VIRTIO_PCI_ISR:
504 /* reading from the ISR also clears it. */
505 ret = qatomic_xchg(&vdev->isr, 0);
506 pci_irq_deassert(&proxy->pci_dev);
507 break;
508 case VIRTIO_MSI_CONFIG_VECTOR:
509 ret = vdev->config_vector;
510 break;
511 case VIRTIO_MSI_QUEUE_VECTOR:
512 ret = virtio_queue_vector(vdev, vdev->queue_sel);
513 break;
514 default:
515 break;
516 }
517
518 return ret;
519 }
520
virtio_pci_config_read(void * opaque,hwaddr addr,unsigned size)521 static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
522 unsigned size)
523 {
524 VirtIOPCIProxy *proxy = opaque;
525 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
526 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
527 uint64_t val = 0;
528
529 if (vdev == NULL) {
530 return UINT64_MAX;
531 }
532
533 if (addr < config) {
534 return virtio_ioport_read(proxy, addr);
535 }
536 addr -= config;
537
538 switch (size) {
539 case 1:
540 val = virtio_config_readb(vdev, addr);
541 break;
542 case 2:
543 val = virtio_config_readw(vdev, addr);
544 if (virtio_is_big_endian(vdev)) {
545 val = bswap16(val);
546 }
547 break;
548 case 4:
549 val = virtio_config_readl(vdev, addr);
550 if (virtio_is_big_endian(vdev)) {
551 val = bswap32(val);
552 }
553 break;
554 }
555 return val;
556 }
557
virtio_pci_config_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)558 static void virtio_pci_config_write(void *opaque, hwaddr addr,
559 uint64_t val, unsigned size)
560 {
561 VirtIOPCIProxy *proxy = opaque;
562 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
563 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
564
565 if (vdev == NULL) {
566 return;
567 }
568
569 if (addr < config) {
570 virtio_ioport_write(proxy, addr, val);
571 return;
572 }
573 addr -= config;
574 /*
575 * Virtio-PCI is odd. Ioports are LE but config space is target native
576 * endian.
577 */
578 switch (size) {
579 case 1:
580 virtio_config_writeb(vdev, addr, val);
581 break;
582 case 2:
583 if (virtio_is_big_endian(vdev)) {
584 val = bswap16(val);
585 }
586 virtio_config_writew(vdev, addr, val);
587 break;
588 case 4:
589 if (virtio_is_big_endian(vdev)) {
590 val = bswap32(val);
591 }
592 virtio_config_writel(vdev, addr, val);
593 break;
594 }
595 }
596
597 static const MemoryRegionOps virtio_pci_config_ops = {
598 .read = virtio_pci_config_read,
599 .write = virtio_pci_config_write,
600 .impl = {
601 .min_access_size = 1,
602 .max_access_size = 4,
603 },
604 .endianness = DEVICE_LITTLE_ENDIAN,
605 };
606
virtio_address_space_lookup(VirtIOPCIProxy * proxy,hwaddr * off,int len)607 static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
608 hwaddr *off, int len)
609 {
610 int i;
611 VirtIOPCIRegion *reg;
612
613 for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
614 reg = &proxy->regs[i];
615 if (*off >= reg->offset &&
616 *off + len <= reg->offset + reg->size) {
617 MemoryRegionSection mrs = memory_region_find(®->mr,
618 *off - reg->offset, len);
619 assert(mrs.mr);
620 *off = mrs.offset_within_region;
621 memory_region_unref(mrs.mr);
622 return mrs.mr;
623 }
624 }
625
626 return NULL;
627 }
628
629 /* Below are generic functions to do memcpy from/to an address space,
630 * without byteswaps, with input validation.
631 *
632 * As regular address_space_* APIs all do some kind of byteswap at least for
633 * some host/target combinations, we are forced to explicitly convert to a
634 * known-endianness integer value.
635 * It doesn't really matter which endian format to go through, so the code
636 * below selects the endian that causes the least amount of work on the given
637 * host.
638 *
639 * Note: host pointer must be aligned.
640 */
641 static
virtio_address_space_write(VirtIOPCIProxy * proxy,hwaddr addr,const uint8_t * buf,int len)642 void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
643 const uint8_t *buf, int len)
644 {
645 uint64_t val;
646 MemoryRegion *mr;
647
648 /* address_space_* APIs assume an aligned address.
649 * As address is under guest control, handle illegal values.
650 */
651 addr &= ~(len - 1);
652
653 mr = virtio_address_space_lookup(proxy, &addr, len);
654 if (!mr) {
655 return;
656 }
657
658 /* Make sure caller aligned buf properly */
659 assert(!(((uintptr_t)buf) & (len - 1)));
660
661 switch (len) {
662 case 1:
663 val = pci_get_byte(buf);
664 break;
665 case 2:
666 val = pci_get_word(buf);
667 break;
668 case 4:
669 val = pci_get_long(buf);
670 break;
671 default:
672 /* As length is under guest control, handle illegal values. */
673 return;
674 }
675 memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
676 MEMTXATTRS_UNSPECIFIED);
677 }
678
679 static void
virtio_address_space_read(VirtIOPCIProxy * proxy,hwaddr addr,uint8_t * buf,int len)680 virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
681 uint8_t *buf, int len)
682 {
683 uint64_t val;
684 MemoryRegion *mr;
685
686 /* address_space_* APIs assume an aligned address.
687 * As address is under guest control, handle illegal values.
688 */
689 addr &= ~(len - 1);
690
691 mr = virtio_address_space_lookup(proxy, &addr, len);
692 if (!mr) {
693 return;
694 }
695
696 /* Make sure caller aligned buf properly */
697 assert(!(((uintptr_t)buf) & (len - 1)));
698
699 memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
700 MEMTXATTRS_UNSPECIFIED);
701 switch (len) {
702 case 1:
703 pci_set_byte(buf, val);
704 break;
705 case 2:
706 pci_set_word(buf, val);
707 break;
708 case 4:
709 pci_set_long(buf, val);
710 break;
711 default:
712 /* As length is under guest control, handle illegal values. */
713 break;
714 }
715 }
716
virtio_pci_ats_ctrl_trigger(PCIDevice * pci_dev,bool enable)717 static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable)
718 {
719 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
720 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
721 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
722
723 vdev->device_iotlb_enabled = enable;
724
725 if (k->toggle_device_iotlb) {
726 k->toggle_device_iotlb(vdev);
727 }
728 }
729
pcie_ats_config_write(PCIDevice * dev,uint32_t address,uint32_t val,int len)730 static void pcie_ats_config_write(PCIDevice *dev, uint32_t address,
731 uint32_t val, int len)
732 {
733 uint32_t off;
734 uint16_t ats_cap = dev->exp.ats_cap;
735
736 if (!ats_cap || address < ats_cap) {
737 return;
738 }
739 off = address - ats_cap;
740 if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
741 return;
742 }
743
744 if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {
745 virtio_pci_ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
746 }
747 }
748
virtio_write_config(PCIDevice * pci_dev,uint32_t address,uint32_t val,int len)749 static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
750 uint32_t val, int len)
751 {
752 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
753 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
754 struct virtio_pci_cfg_cap *cfg;
755
756 pci_default_write_config(pci_dev, address, val, len);
757
758 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
759 pcie_cap_flr_write_config(pci_dev, address, val, len);
760 }
761
762 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
763 pcie_ats_config_write(pci_dev, address, val, len);
764 }
765
766 if (range_covers_byte(address, len, PCI_COMMAND)) {
767 if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
768 virtio_set_disabled(vdev, true);
769 virtio_pci_stop_ioeventfd(proxy);
770 virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
771 } else {
772 virtio_set_disabled(vdev, false);
773 }
774 }
775
776 if (proxy->config_cap &&
777 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
778 pci_cfg_data),
779 sizeof cfg->pci_cfg_data)) {
780 uint32_t off;
781 uint32_t caplen;
782
783 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
784 off = le32_to_cpu(cfg->cap.offset);
785 caplen = le32_to_cpu(cfg->cap.length);
786
787 if (caplen == 1 || caplen == 2 || caplen == 4) {
788 assert(caplen <= sizeof cfg->pci_cfg_data);
789 virtio_address_space_write(proxy, off, cfg->pci_cfg_data, caplen);
790 }
791 }
792 }
793
virtio_read_config(PCIDevice * pci_dev,uint32_t address,int len)794 static uint32_t virtio_read_config(PCIDevice *pci_dev,
795 uint32_t address, int len)
796 {
797 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
798 struct virtio_pci_cfg_cap *cfg;
799
800 if (proxy->config_cap &&
801 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
802 pci_cfg_data),
803 sizeof cfg->pci_cfg_data)) {
804 uint32_t off;
805 uint32_t caplen;
806
807 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
808 off = le32_to_cpu(cfg->cap.offset);
809 caplen = le32_to_cpu(cfg->cap.length);
810
811 if (caplen == 1 || caplen == 2 || caplen == 4) {
812 assert(caplen <= sizeof cfg->pci_cfg_data);
813 virtio_address_space_read(proxy, off, cfg->pci_cfg_data, caplen);
814 }
815 }
816
817 return pci_default_read_config(pci_dev, address, len);
818 }
819
kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy * proxy,unsigned int vector)820 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
821 unsigned int vector)
822 {
823 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
824 int ret;
825
826 if (irqfd->users == 0) {
827 KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
828 ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
829 if (ret < 0) {
830 return ret;
831 }
832 kvm_irqchip_commit_route_changes(&c);
833 irqfd->virq = ret;
834 }
835 irqfd->users++;
836 return 0;
837 }
838
kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy * proxy,unsigned int vector)839 static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
840 unsigned int vector)
841 {
842 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
843 if (--irqfd->users == 0) {
844 kvm_irqchip_release_virq(kvm_state, irqfd->virq);
845 }
846 }
847
kvm_virtio_pci_irqfd_use(VirtIOPCIProxy * proxy,EventNotifier * n,unsigned int vector)848 static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
849 EventNotifier *n,
850 unsigned int vector)
851 {
852 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
853 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
854 }
855
kvm_virtio_pci_irqfd_release(VirtIOPCIProxy * proxy,EventNotifier * n,unsigned int vector)856 static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
857 EventNotifier *n ,
858 unsigned int vector)
859 {
860 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
861 int ret;
862
863 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
864 assert(ret == 0);
865 }
virtio_pci_get_notifier(VirtIOPCIProxy * proxy,int queue_no,EventNotifier ** n,unsigned int * vector)866 static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
867 EventNotifier **n, unsigned int *vector)
868 {
869 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
870 VirtQueue *vq;
871
872 if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
873 return -1;
874
875 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
876 *n = virtio_config_get_guest_notifier(vdev);
877 *vector = vdev->config_vector;
878 } else {
879 if (!virtio_queue_get_num(vdev, queue_no)) {
880 return -1;
881 }
882 *vector = virtio_queue_vector(vdev, queue_no);
883 vq = virtio_get_queue(vdev, queue_no);
884 *n = virtio_queue_get_guest_notifier(vq);
885 }
886 return 0;
887 }
888
kvm_virtio_pci_vector_use_one(VirtIOPCIProxy * proxy,int queue_no)889 static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
890 {
891 unsigned int vector;
892 int ret;
893 EventNotifier *n;
894 PCIDevice *dev = &proxy->pci_dev;
895 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
896 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
897
898 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
899 if (ret < 0) {
900 return ret;
901 }
902 if (vector >= msix_nr_vectors_allocated(dev)) {
903 return 0;
904 }
905 ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
906 if (ret < 0) {
907 return ret;
908 }
909 /*
910 * If guest supports masking, set up irqfd now.
911 * Otherwise, delay until unmasked in the frontend.
912 */
913 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
914 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
915 if (ret < 0) {
916 kvm_virtio_pci_vq_vector_release(proxy, vector);
917 return ret;
918 }
919 }
920
921 return 0;
922 }
kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy * proxy,int nvqs)923 static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs)
924 {
925 int queue_no;
926 int ret = 0;
927 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
928
929 for (queue_no = 0; queue_no < nvqs; queue_no++) {
930 if (!virtio_queue_get_num(vdev, queue_no)) {
931 return -1;
932 }
933 ret = kvm_virtio_pci_vector_use_one(proxy, queue_no);
934 }
935 return ret;
936 }
937
kvm_virtio_pci_vector_config_use(VirtIOPCIProxy * proxy)938 static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy)
939 {
940 return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
941 }
942
kvm_virtio_pci_vector_release_one(VirtIOPCIProxy * proxy,int queue_no)943 static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
944 int queue_no)
945 {
946 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
947 unsigned int vector;
948 EventNotifier *n;
949 int ret;
950 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
951 PCIDevice *dev = &proxy->pci_dev;
952
953 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
954 if (ret < 0) {
955 return;
956 }
957 if (vector >= msix_nr_vectors_allocated(dev)) {
958 return;
959 }
960 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
961 kvm_virtio_pci_irqfd_release(proxy, n, vector);
962 }
963 kvm_virtio_pci_vq_vector_release(proxy, vector);
964 }
965
kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy * proxy,int nvqs)966 static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs)
967 {
968 int queue_no;
969 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
970
971 for (queue_no = 0; queue_no < nvqs; queue_no++) {
972 if (!virtio_queue_get_num(vdev, queue_no)) {
973 break;
974 }
975 kvm_virtio_pci_vector_release_one(proxy, queue_no);
976 }
977 }
978
kvm_virtio_pci_vector_config_release(VirtIOPCIProxy * proxy)979 static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy)
980 {
981 kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
982 }
983
virtio_pci_one_vector_unmask(VirtIOPCIProxy * proxy,unsigned int queue_no,unsigned int vector,MSIMessage msg,EventNotifier * n)984 static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
985 unsigned int queue_no,
986 unsigned int vector,
987 MSIMessage msg,
988 EventNotifier *n)
989 {
990 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
991 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
992 VirtIOIRQFD *irqfd;
993 int ret = 0;
994
995 if (proxy->vector_irqfd) {
996 irqfd = &proxy->vector_irqfd[vector];
997 if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
998 ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
999 &proxy->pci_dev);
1000 if (ret < 0) {
1001 return ret;
1002 }
1003 kvm_irqchip_commit_routes(kvm_state);
1004 }
1005 }
1006
1007 /* If guest supports masking, irqfd is already setup, unmask it.
1008 * Otherwise, set it up now.
1009 */
1010 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
1011 k->guest_notifier_mask(vdev, queue_no, false);
1012 /* Test after unmasking to avoid losing events. */
1013 if (k->guest_notifier_pending &&
1014 k->guest_notifier_pending(vdev, queue_no)) {
1015 event_notifier_set(n);
1016 }
1017 } else {
1018 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
1019 }
1020 return ret;
1021 }
1022
virtio_pci_one_vector_mask(VirtIOPCIProxy * proxy,unsigned int queue_no,unsigned int vector,EventNotifier * n)1023 static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy,
1024 unsigned int queue_no,
1025 unsigned int vector,
1026 EventNotifier *n)
1027 {
1028 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1029 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1030
1031 /* If guest supports masking, keep irqfd but mask it.
1032 * Otherwise, clean it up now.
1033 */
1034 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
1035 k->guest_notifier_mask(vdev, queue_no, true);
1036 } else {
1037 kvm_virtio_pci_irqfd_release(proxy, n, vector);
1038 }
1039 }
1040
virtio_pci_vector_unmask(PCIDevice * dev,unsigned vector,MSIMessage msg)1041 static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
1042 MSIMessage msg)
1043 {
1044 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1045 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1046 VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
1047 EventNotifier *n;
1048 int ret, index, unmasked = 0;
1049
1050 while (vq) {
1051 index = virtio_get_queue_index(vq);
1052 if (!virtio_queue_get_num(vdev, index)) {
1053 break;
1054 }
1055 if (index < proxy->nvqs_with_notifiers) {
1056 n = virtio_queue_get_guest_notifier(vq);
1057 ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n);
1058 if (ret < 0) {
1059 goto undo;
1060 }
1061 ++unmasked;
1062 }
1063 vq = virtio_vector_next_queue(vq);
1064 }
1065 /* unmask config intr */
1066 if (vector == vdev->config_vector) {
1067 n = virtio_config_get_guest_notifier(vdev);
1068 ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector,
1069 msg, n);
1070 if (ret < 0) {
1071 goto undo_config;
1072 }
1073 }
1074 return 0;
1075 undo_config:
1076 n = virtio_config_get_guest_notifier(vdev);
1077 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
1078 undo:
1079 vq = virtio_vector_first_queue(vdev, vector);
1080 while (vq && unmasked >= 0) {
1081 index = virtio_get_queue_index(vq);
1082 if (index < proxy->nvqs_with_notifiers) {
1083 n = virtio_queue_get_guest_notifier(vq);
1084 virtio_pci_one_vector_mask(proxy, index, vector, n);
1085 --unmasked;
1086 }
1087 vq = virtio_vector_next_queue(vq);
1088 }
1089 return ret;
1090 }
1091
virtio_pci_vector_mask(PCIDevice * dev,unsigned vector)1092 static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
1093 {
1094 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1095 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1096 VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
1097 EventNotifier *n;
1098 int index;
1099
1100 while (vq) {
1101 index = virtio_get_queue_index(vq);
1102 n = virtio_queue_get_guest_notifier(vq);
1103 if (!virtio_queue_get_num(vdev, index)) {
1104 break;
1105 }
1106 if (index < proxy->nvqs_with_notifiers) {
1107 virtio_pci_one_vector_mask(proxy, index, vector, n);
1108 }
1109 vq = virtio_vector_next_queue(vq);
1110 }
1111
1112 if (vector == vdev->config_vector) {
1113 n = virtio_config_get_guest_notifier(vdev);
1114 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
1115 }
1116 }
1117
virtio_pci_vector_poll(PCIDevice * dev,unsigned int vector_start,unsigned int vector_end)1118 static void virtio_pci_vector_poll(PCIDevice *dev,
1119 unsigned int vector_start,
1120 unsigned int vector_end)
1121 {
1122 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1123 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1124 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1125 int queue_no;
1126 unsigned int vector;
1127 EventNotifier *notifier;
1128 int ret;
1129
1130 for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
1131 ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector);
1132 if (ret < 0) {
1133 break;
1134 }
1135 if (vector < vector_start || vector >= vector_end ||
1136 !msix_is_masked(dev, vector)) {
1137 continue;
1138 }
1139 if (k->guest_notifier_pending) {
1140 if (k->guest_notifier_pending(vdev, queue_no)) {
1141 msix_set_pending(dev, vector);
1142 }
1143 } else if (event_notifier_test_and_clear(notifier)) {
1144 msix_set_pending(dev, vector);
1145 }
1146 }
1147 /* poll the config intr */
1148 ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier,
1149 &vector);
1150 if (ret < 0) {
1151 return;
1152 }
1153 if (vector < vector_start || vector >= vector_end ||
1154 !msix_is_masked(dev, vector)) {
1155 return;
1156 }
1157 if (k->guest_notifier_pending) {
1158 if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) {
1159 msix_set_pending(dev, vector);
1160 }
1161 } else if (event_notifier_test_and_clear(notifier)) {
1162 msix_set_pending(dev, vector);
1163 }
1164 }
1165
virtio_pci_set_guest_notifier_fd_handler(VirtIODevice * vdev,VirtQueue * vq,int n,bool assign,bool with_irqfd)1166 void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
1167 int n, bool assign,
1168 bool with_irqfd)
1169 {
1170 if (n == VIRTIO_CONFIG_IRQ_IDX) {
1171 virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
1172 } else {
1173 virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd);
1174 }
1175 }
1176
virtio_pci_set_guest_notifier(DeviceState * d,int n,bool assign,bool with_irqfd)1177 static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
1178 bool with_irqfd)
1179 {
1180 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1181 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1182 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1183 VirtQueue *vq = NULL;
1184 EventNotifier *notifier = NULL;
1185
1186 if (n == VIRTIO_CONFIG_IRQ_IDX) {
1187 notifier = virtio_config_get_guest_notifier(vdev);
1188 } else {
1189 vq = virtio_get_queue(vdev, n);
1190 notifier = virtio_queue_get_guest_notifier(vq);
1191 }
1192
1193 if (assign) {
1194 int r = event_notifier_init(notifier, 0);
1195 if (r < 0) {
1196 return r;
1197 }
1198 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd);
1199 } else {
1200 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false,
1201 with_irqfd);
1202 event_notifier_cleanup(notifier);
1203 }
1204
1205 if (!msix_enabled(&proxy->pci_dev) &&
1206 vdev->use_guest_notifier_mask &&
1207 vdc->guest_notifier_mask) {
1208 vdc->guest_notifier_mask(vdev, n, !assign);
1209 }
1210
1211 return 0;
1212 }
1213
virtio_pci_query_guest_notifiers(DeviceState * d)1214 static bool virtio_pci_query_guest_notifiers(DeviceState *d)
1215 {
1216 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1217
1218 if (msix_enabled(&proxy->pci_dev)) {
1219 return true;
1220 } else {
1221 return pci_irq_disabled(&proxy->pci_dev);
1222 }
1223 }
1224
virtio_pci_set_guest_notifiers(DeviceState * d,int nvqs,bool assign)1225 static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
1226 {
1227 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1228 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1229 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1230 int r, n;
1231 bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
1232 kvm_msi_via_irqfd_enabled();
1233
1234 nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
1235
1236 /*
1237 * When deassigning, pass a consistent nvqs value to avoid leaking
1238 * notifiers. But first check we've actually been configured, exit
1239 * early if we haven't.
1240 */
1241 if (!assign && !proxy->nvqs_with_notifiers) {
1242 return 0;
1243 }
1244 assert(assign || nvqs == proxy->nvqs_with_notifiers);
1245
1246 proxy->nvqs_with_notifiers = nvqs;
1247
1248 /* Must unset vector notifier while guest notifier is still assigned */
1249 if ((proxy->vector_irqfd ||
1250 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
1251 !assign) {
1252 msix_unset_vector_notifiers(&proxy->pci_dev);
1253 if (proxy->vector_irqfd) {
1254 kvm_virtio_pci_vector_vq_release(proxy, nvqs);
1255 kvm_virtio_pci_vector_config_release(proxy);
1256 g_free(proxy->vector_irqfd);
1257 proxy->vector_irqfd = NULL;
1258 }
1259 }
1260
1261 for (n = 0; n < nvqs; n++) {
1262 if (!virtio_queue_get_num(vdev, n)) {
1263 break;
1264 }
1265
1266 r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1267 if (r < 0) {
1268 goto assign_error;
1269 }
1270 }
1271 r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign,
1272 with_irqfd);
1273 if (r < 0) {
1274 goto config_assign_error;
1275 }
1276 /* Must set vector notifier after guest notifier has been assigned */
1277 if ((with_irqfd ||
1278 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
1279 assign) {
1280 if (with_irqfd) {
1281 proxy->vector_irqfd =
1282 g_malloc0(sizeof(*proxy->vector_irqfd) *
1283 msix_nr_vectors_allocated(&proxy->pci_dev));
1284 r = kvm_virtio_pci_vector_vq_use(proxy, nvqs);
1285 if (r < 0) {
1286 goto config_assign_error;
1287 }
1288 r = kvm_virtio_pci_vector_config_use(proxy);
1289 if (r < 0) {
1290 goto config_error;
1291 }
1292 }
1293
1294 r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask,
1295 virtio_pci_vector_mask,
1296 virtio_pci_vector_poll);
1297 if (r < 0) {
1298 goto notifiers_error;
1299 }
1300 }
1301
1302 return 0;
1303
1304 notifiers_error:
1305 if (with_irqfd) {
1306 assert(assign);
1307 kvm_virtio_pci_vector_vq_release(proxy, nvqs);
1308 }
1309 config_error:
1310 if (with_irqfd) {
1311 kvm_virtio_pci_vector_config_release(proxy);
1312 }
1313 config_assign_error:
1314 virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign,
1315 with_irqfd);
1316 assign_error:
1317 /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1318 assert(assign);
1319 while (--n >= 0) {
1320 virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1321 }
1322 g_free(proxy->vector_irqfd);
1323 proxy->vector_irqfd = NULL;
1324 return r;
1325 }
1326
virtio_pci_set_host_notifier_mr(DeviceState * d,int n,MemoryRegion * mr,bool assign)1327 static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1328 MemoryRegion *mr, bool assign)
1329 {
1330 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1331 int offset;
1332
1333 if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1334 virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1335 return -1;
1336 }
1337
1338 if (assign) {
1339 offset = virtio_pci_queue_mem_mult(proxy) * n;
1340 memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1341 } else {
1342 memory_region_del_subregion(&proxy->notify.mr, mr);
1343 }
1344
1345 return 0;
1346 }
1347
virtio_pci_vmstate_change(DeviceState * d,bool running)1348 static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1349 {
1350 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1351 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1352
1353 if (running) {
1354 /* Old QEMU versions did not set bus master enable on status write.
1355 * Detect DRIVER set and enable it.
1356 */
1357 if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1358 (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1359 !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1360 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1361 proxy->pci_dev.config[PCI_COMMAND] |
1362 PCI_COMMAND_MASTER, 1);
1363 }
1364 virtio_pci_start_ioeventfd(proxy);
1365 } else {
1366 virtio_pci_stop_ioeventfd(proxy);
1367 }
1368 }
1369
1370 /*
1371 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1372 */
1373
virtio_pci_query_nvectors(DeviceState * d)1374 static int virtio_pci_query_nvectors(DeviceState *d)
1375 {
1376 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1377
1378 return proxy->nvectors;
1379 }
1380
virtio_pci_get_dma_as(DeviceState * d)1381 static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1382 {
1383 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1384 PCIDevice *dev = &proxy->pci_dev;
1385
1386 return pci_get_address_space(dev);
1387 }
1388
virtio_pci_iommu_enabled(DeviceState * d)1389 static bool virtio_pci_iommu_enabled(DeviceState *d)
1390 {
1391 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1392 PCIDevice *dev = &proxy->pci_dev;
1393 AddressSpace *dma_as = pci_device_iommu_address_space(dev);
1394
1395 if (dma_as == &address_space_memory) {
1396 return false;
1397 }
1398
1399 return true;
1400 }
1401
virtio_pci_queue_enabled(DeviceState * d,int n)1402 static bool virtio_pci_queue_enabled(DeviceState *d, int n)
1403 {
1404 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1405 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1406
1407 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1408 return proxy->vqs[n].enabled;
1409 }
1410
1411 return virtio_queue_enabled_legacy(vdev, n);
1412 }
1413
virtio_pci_add_mem_cap(VirtIOPCIProxy * proxy,struct virtio_pci_cap * cap)1414 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1415 struct virtio_pci_cap *cap)
1416 {
1417 PCIDevice *dev = &proxy->pci_dev;
1418 int offset;
1419
1420 offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1421 cap->cap_len, &error_abort);
1422
1423 assert(cap->cap_len >= sizeof *cap);
1424 memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1425 cap->cap_len - PCI_CAP_FLAGS);
1426
1427 return offset;
1428 }
1429
virtio_pci_set_vector(VirtIODevice * vdev,VirtIOPCIProxy * proxy,int queue_no,uint16_t old_vector,uint16_t new_vector)1430 static void virtio_pci_set_vector(VirtIODevice *vdev,
1431 VirtIOPCIProxy *proxy,
1432 int queue_no, uint16_t old_vector,
1433 uint16_t new_vector)
1434 {
1435 bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
1436 msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
1437
1438 if (new_vector == old_vector) {
1439 return;
1440 }
1441
1442 /*
1443 * If the device uses irqfd and the vector changes after DRIVER_OK is
1444 * set, we need to release the old vector and set up the new one.
1445 * Otherwise just need to set the new vector on the device.
1446 */
1447 if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) {
1448 kvm_virtio_pci_vector_release_one(proxy, queue_no);
1449 }
1450 /* Set the new vector on the device. */
1451 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
1452 vdev->config_vector = new_vector;
1453 } else {
1454 virtio_queue_set_vector(vdev, queue_no, new_vector);
1455 }
1456 /* If the new vector changed need to set it up. */
1457 if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) {
1458 kvm_virtio_pci_vector_use_one(proxy, queue_no);
1459 }
1460 }
1461
virtio_pci_add_shm_cap(VirtIOPCIProxy * proxy,uint8_t bar,uint64_t offset,uint64_t length,uint8_t id)1462 int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
1463 uint8_t bar, uint64_t offset, uint64_t length,
1464 uint8_t id)
1465 {
1466 struct virtio_pci_cap64 cap = {
1467 .cap.cap_len = sizeof cap,
1468 .cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG,
1469 };
1470
1471 cap.cap.bar = bar;
1472 cap.cap.length = cpu_to_le32(length);
1473 cap.length_hi = cpu_to_le32(length >> 32);
1474 cap.cap.offset = cpu_to_le32(offset);
1475 cap.offset_hi = cpu_to_le32(offset >> 32);
1476 cap.cap.id = id;
1477 return virtio_pci_add_mem_cap(proxy, &cap.cap);
1478 }
1479
virtio_pci_common_read(void * opaque,hwaddr addr,unsigned size)1480 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1481 unsigned size)
1482 {
1483 VirtIOPCIProxy *proxy = opaque;
1484 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1485 uint32_t val = 0;
1486 int i;
1487
1488 if (vdev == NULL) {
1489 return UINT64_MAX;
1490 }
1491
1492 switch (addr) {
1493 case VIRTIO_PCI_COMMON_DFSELECT:
1494 val = proxy->dfselect;
1495 break;
1496 case VIRTIO_PCI_COMMON_DF:
1497 if (proxy->dfselect <= 1) {
1498 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1499
1500 val = (vdev->host_features & ~vdc->legacy_features) >>
1501 (32 * proxy->dfselect);
1502 }
1503 break;
1504 case VIRTIO_PCI_COMMON_GFSELECT:
1505 val = proxy->gfselect;
1506 break;
1507 case VIRTIO_PCI_COMMON_GF:
1508 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1509 val = proxy->guest_features[proxy->gfselect];
1510 }
1511 break;
1512 case VIRTIO_PCI_COMMON_MSIX:
1513 val = vdev->config_vector;
1514 break;
1515 case VIRTIO_PCI_COMMON_NUMQ:
1516 for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1517 if (virtio_queue_get_num(vdev, i)) {
1518 val = i + 1;
1519 }
1520 }
1521 break;
1522 case VIRTIO_PCI_COMMON_STATUS:
1523 val = vdev->status;
1524 break;
1525 case VIRTIO_PCI_COMMON_CFGGENERATION:
1526 val = vdev->generation;
1527 break;
1528 case VIRTIO_PCI_COMMON_Q_SELECT:
1529 val = vdev->queue_sel;
1530 break;
1531 case VIRTIO_PCI_COMMON_Q_SIZE:
1532 val = virtio_queue_get_num(vdev, vdev->queue_sel);
1533 break;
1534 case VIRTIO_PCI_COMMON_Q_MSIX:
1535 val = virtio_queue_vector(vdev, vdev->queue_sel);
1536 break;
1537 case VIRTIO_PCI_COMMON_Q_ENABLE:
1538 val = proxy->vqs[vdev->queue_sel].enabled;
1539 break;
1540 case VIRTIO_PCI_COMMON_Q_NOFF:
1541 /* Simply map queues in order */
1542 val = vdev->queue_sel;
1543 break;
1544 case VIRTIO_PCI_COMMON_Q_DESCLO:
1545 val = proxy->vqs[vdev->queue_sel].desc[0];
1546 break;
1547 case VIRTIO_PCI_COMMON_Q_DESCHI:
1548 val = proxy->vqs[vdev->queue_sel].desc[1];
1549 break;
1550 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1551 val = proxy->vqs[vdev->queue_sel].avail[0];
1552 break;
1553 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1554 val = proxy->vqs[vdev->queue_sel].avail[1];
1555 break;
1556 case VIRTIO_PCI_COMMON_Q_USEDLO:
1557 val = proxy->vqs[vdev->queue_sel].used[0];
1558 break;
1559 case VIRTIO_PCI_COMMON_Q_USEDHI:
1560 val = proxy->vqs[vdev->queue_sel].used[1];
1561 break;
1562 case VIRTIO_PCI_COMMON_Q_RESET:
1563 val = proxy->vqs[vdev->queue_sel].reset;
1564 break;
1565 default:
1566 val = 0;
1567 }
1568
1569 return val;
1570 }
1571
virtio_pci_common_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1572 static void virtio_pci_common_write(void *opaque, hwaddr addr,
1573 uint64_t val, unsigned size)
1574 {
1575 VirtIOPCIProxy *proxy = opaque;
1576 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1577 uint16_t vector;
1578
1579 if (vdev == NULL) {
1580 return;
1581 }
1582
1583 switch (addr) {
1584 case VIRTIO_PCI_COMMON_DFSELECT:
1585 proxy->dfselect = val;
1586 break;
1587 case VIRTIO_PCI_COMMON_GFSELECT:
1588 proxy->gfselect = val;
1589 break;
1590 case VIRTIO_PCI_COMMON_GF:
1591 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1592 proxy->guest_features[proxy->gfselect] = val;
1593 virtio_set_features(vdev,
1594 (((uint64_t)proxy->guest_features[1]) << 32) |
1595 proxy->guest_features[0]);
1596 }
1597 break;
1598 case VIRTIO_PCI_COMMON_MSIX:
1599 if (vdev->config_vector != VIRTIO_NO_VECTOR) {
1600 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1601 }
1602 /* Make it possible for guest to discover an error took place. */
1603 if (val < proxy->nvectors) {
1604 msix_vector_use(&proxy->pci_dev, val);
1605 } else {
1606 val = VIRTIO_NO_VECTOR;
1607 }
1608 virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX,
1609 vdev->config_vector, val);
1610 break;
1611 case VIRTIO_PCI_COMMON_STATUS:
1612 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1613 virtio_pci_stop_ioeventfd(proxy);
1614 }
1615
1616 virtio_set_status(vdev, val & 0xFF);
1617
1618 if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1619 virtio_pci_start_ioeventfd(proxy);
1620 }
1621
1622 if (vdev->status == 0) {
1623 virtio_pci_reset(DEVICE(proxy));
1624 }
1625
1626 break;
1627 case VIRTIO_PCI_COMMON_Q_SELECT:
1628 if (val < VIRTIO_QUEUE_MAX) {
1629 vdev->queue_sel = val;
1630 }
1631 break;
1632 case VIRTIO_PCI_COMMON_Q_SIZE:
1633 proxy->vqs[vdev->queue_sel].num = val;
1634 virtio_queue_set_num(vdev, vdev->queue_sel,
1635 proxy->vqs[vdev->queue_sel].num);
1636 virtio_init_region_cache(vdev, vdev->queue_sel);
1637 break;
1638 case VIRTIO_PCI_COMMON_Q_MSIX:
1639 vector = virtio_queue_vector(vdev, vdev->queue_sel);
1640 if (vector != VIRTIO_NO_VECTOR) {
1641 msix_vector_unuse(&proxy->pci_dev, vector);
1642 }
1643 /* Make it possible for guest to discover an error took place. */
1644 if (val < proxy->nvectors) {
1645 msix_vector_use(&proxy->pci_dev, val);
1646 } else {
1647 val = VIRTIO_NO_VECTOR;
1648 }
1649 virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val);
1650 break;
1651 case VIRTIO_PCI_COMMON_Q_ENABLE:
1652 if (val == 1) {
1653 virtio_queue_set_num(vdev, vdev->queue_sel,
1654 proxy->vqs[vdev->queue_sel].num);
1655 virtio_queue_set_rings(vdev, vdev->queue_sel,
1656 ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1657 proxy->vqs[vdev->queue_sel].desc[0],
1658 ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1659 proxy->vqs[vdev->queue_sel].avail[0],
1660 ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1661 proxy->vqs[vdev->queue_sel].used[0]);
1662 proxy->vqs[vdev->queue_sel].enabled = 1;
1663 proxy->vqs[vdev->queue_sel].reset = 0;
1664 virtio_queue_enable(vdev, vdev->queue_sel);
1665 } else {
1666 virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
1667 }
1668 break;
1669 case VIRTIO_PCI_COMMON_Q_DESCLO:
1670 proxy->vqs[vdev->queue_sel].desc[0] = val;
1671 break;
1672 case VIRTIO_PCI_COMMON_Q_DESCHI:
1673 proxy->vqs[vdev->queue_sel].desc[1] = val;
1674 break;
1675 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1676 proxy->vqs[vdev->queue_sel].avail[0] = val;
1677 break;
1678 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1679 proxy->vqs[vdev->queue_sel].avail[1] = val;
1680 break;
1681 case VIRTIO_PCI_COMMON_Q_USEDLO:
1682 proxy->vqs[vdev->queue_sel].used[0] = val;
1683 break;
1684 case VIRTIO_PCI_COMMON_Q_USEDHI:
1685 proxy->vqs[vdev->queue_sel].used[1] = val;
1686 break;
1687 case VIRTIO_PCI_COMMON_Q_RESET:
1688 if (val == 1) {
1689 proxy->vqs[vdev->queue_sel].reset = 1;
1690
1691 virtio_queue_reset(vdev, vdev->queue_sel);
1692
1693 proxy->vqs[vdev->queue_sel].reset = 0;
1694 proxy->vqs[vdev->queue_sel].enabled = 0;
1695 }
1696 break;
1697 default:
1698 break;
1699 }
1700 }
1701
1702
virtio_pci_notify_read(void * opaque,hwaddr addr,unsigned size)1703 static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1704 unsigned size)
1705 {
1706 VirtIOPCIProxy *proxy = opaque;
1707 if (virtio_bus_get_device(&proxy->bus) == NULL) {
1708 return UINT64_MAX;
1709 }
1710
1711 return 0;
1712 }
1713
virtio_pci_notify_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1714 static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1715 uint64_t val, unsigned size)
1716 {
1717 VirtIOPCIProxy *proxy = opaque;
1718 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1719
1720 unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1721
1722 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1723 trace_virtio_pci_notify_write(addr, val, size);
1724 virtio_queue_notify(vdev, queue);
1725 }
1726 }
1727
virtio_pci_notify_write_pio(void * opaque,hwaddr addr,uint64_t val,unsigned size)1728 static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1729 uint64_t val, unsigned size)
1730 {
1731 VirtIOPCIProxy *proxy = opaque;
1732 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1733
1734 unsigned queue = val;
1735
1736 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1737 trace_virtio_pci_notify_write_pio(addr, val, size);
1738 virtio_queue_notify(vdev, queue);
1739 }
1740 }
1741
virtio_pci_isr_read(void * opaque,hwaddr addr,unsigned size)1742 static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1743 unsigned size)
1744 {
1745 VirtIOPCIProxy *proxy = opaque;
1746 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1747 uint64_t val;
1748
1749 if (vdev == NULL) {
1750 return UINT64_MAX;
1751 }
1752
1753 val = qatomic_xchg(&vdev->isr, 0);
1754 pci_irq_deassert(&proxy->pci_dev);
1755 return val;
1756 }
1757
virtio_pci_isr_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1758 static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1759 uint64_t val, unsigned size)
1760 {
1761 }
1762
virtio_pci_device_read(void * opaque,hwaddr addr,unsigned size)1763 static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1764 unsigned size)
1765 {
1766 VirtIOPCIProxy *proxy = opaque;
1767 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1768 uint64_t val;
1769
1770 if (vdev == NULL) {
1771 return UINT64_MAX;
1772 }
1773
1774 switch (size) {
1775 case 1:
1776 val = virtio_config_modern_readb(vdev, addr);
1777 break;
1778 case 2:
1779 val = virtio_config_modern_readw(vdev, addr);
1780 break;
1781 case 4:
1782 val = virtio_config_modern_readl(vdev, addr);
1783 break;
1784 default:
1785 val = 0;
1786 break;
1787 }
1788 return val;
1789 }
1790
virtio_pci_device_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1791 static void virtio_pci_device_write(void *opaque, hwaddr addr,
1792 uint64_t val, unsigned size)
1793 {
1794 VirtIOPCIProxy *proxy = opaque;
1795 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1796
1797 if (vdev == NULL) {
1798 return;
1799 }
1800
1801 switch (size) {
1802 case 1:
1803 virtio_config_modern_writeb(vdev, addr, val);
1804 break;
1805 case 2:
1806 virtio_config_modern_writew(vdev, addr, val);
1807 break;
1808 case 4:
1809 virtio_config_modern_writel(vdev, addr, val);
1810 break;
1811 }
1812 }
1813
virtio_pci_modern_regions_init(VirtIOPCIProxy * proxy,const char * vdev_name)1814 static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
1815 const char *vdev_name)
1816 {
1817 static const MemoryRegionOps common_ops = {
1818 .read = virtio_pci_common_read,
1819 .write = virtio_pci_common_write,
1820 .impl = {
1821 .min_access_size = 1,
1822 .max_access_size = 4,
1823 },
1824 .endianness = DEVICE_LITTLE_ENDIAN,
1825 };
1826 static const MemoryRegionOps isr_ops = {
1827 .read = virtio_pci_isr_read,
1828 .write = virtio_pci_isr_write,
1829 .impl = {
1830 .min_access_size = 1,
1831 .max_access_size = 4,
1832 },
1833 .endianness = DEVICE_LITTLE_ENDIAN,
1834 };
1835 static const MemoryRegionOps device_ops = {
1836 .read = virtio_pci_device_read,
1837 .write = virtio_pci_device_write,
1838 .impl = {
1839 .min_access_size = 1,
1840 .max_access_size = 4,
1841 },
1842 .endianness = DEVICE_LITTLE_ENDIAN,
1843 };
1844 static const MemoryRegionOps notify_ops = {
1845 .read = virtio_pci_notify_read,
1846 .write = virtio_pci_notify_write,
1847 .impl = {
1848 .min_access_size = 1,
1849 .max_access_size = 4,
1850 },
1851 .endianness = DEVICE_LITTLE_ENDIAN,
1852 };
1853 static const MemoryRegionOps notify_pio_ops = {
1854 .read = virtio_pci_notify_read,
1855 .write = virtio_pci_notify_write_pio,
1856 .impl = {
1857 .min_access_size = 1,
1858 .max_access_size = 4,
1859 },
1860 .endianness = DEVICE_LITTLE_ENDIAN,
1861 };
1862 g_autoptr(GString) name = g_string_new(NULL);
1863
1864 g_string_printf(name, "virtio-pci-common-%s", vdev_name);
1865 memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1866 &common_ops,
1867 proxy,
1868 name->str,
1869 proxy->common.size);
1870
1871 g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
1872 memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1873 &isr_ops,
1874 proxy,
1875 name->str,
1876 proxy->isr.size);
1877
1878 g_string_printf(name, "virtio-pci-device-%s", vdev_name);
1879 memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1880 &device_ops,
1881 proxy,
1882 name->str,
1883 proxy->device.size);
1884
1885 g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
1886 memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1887 ¬ify_ops,
1888 proxy,
1889 name->str,
1890 proxy->notify.size);
1891
1892 g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
1893 memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1894 ¬ify_pio_ops,
1895 proxy,
1896 name->str,
1897 proxy->notify_pio.size);
1898 }
1899
virtio_pci_modern_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap,MemoryRegion * mr,uint8_t bar)1900 static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1901 VirtIOPCIRegion *region,
1902 struct virtio_pci_cap *cap,
1903 MemoryRegion *mr,
1904 uint8_t bar)
1905 {
1906 memory_region_add_subregion(mr, region->offset, ®ion->mr);
1907
1908 cap->cfg_type = region->type;
1909 cap->bar = bar;
1910 cap->offset = cpu_to_le32(region->offset);
1911 cap->length = cpu_to_le32(region->size);
1912 virtio_pci_add_mem_cap(proxy, cap);
1913
1914 }
1915
virtio_pci_modern_mem_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap)1916 static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1917 VirtIOPCIRegion *region,
1918 struct virtio_pci_cap *cap)
1919 {
1920 virtio_pci_modern_region_map(proxy, region, cap,
1921 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1922 }
1923
virtio_pci_modern_io_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap)1924 static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1925 VirtIOPCIRegion *region,
1926 struct virtio_pci_cap *cap)
1927 {
1928 virtio_pci_modern_region_map(proxy, region, cap,
1929 &proxy->io_bar, proxy->modern_io_bar_idx);
1930 }
1931
virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region)1932 static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1933 VirtIOPCIRegion *region)
1934 {
1935 memory_region_del_subregion(&proxy->modern_bar,
1936 ®ion->mr);
1937 }
1938
virtio_pci_modern_io_region_unmap(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region)1939 static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1940 VirtIOPCIRegion *region)
1941 {
1942 memory_region_del_subregion(&proxy->io_bar,
1943 ®ion->mr);
1944 }
1945
virtio_pci_pre_plugged(DeviceState * d,Error ** errp)1946 static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1947 {
1948 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1949 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1950
1951 if (virtio_pci_modern(proxy)) {
1952 virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1953 }
1954
1955 virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1956 }
1957
1958 /* This is called by virtio-bus just after the device is plugged. */
virtio_pci_device_plugged(DeviceState * d,Error ** errp)1959 static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1960 {
1961 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1962 VirtioBusState *bus = &proxy->bus;
1963 bool legacy = virtio_pci_legacy(proxy);
1964 bool modern;
1965 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1966 uint8_t *config;
1967 uint32_t size;
1968 VirtIODevice *vdev = virtio_bus_get_device(bus);
1969 int16_t res;
1970
1971 /*
1972 * Virtio capabilities present without
1973 * VIRTIO_F_VERSION_1 confuses guests
1974 */
1975 if (!proxy->ignore_backend_features &&
1976 !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1977 virtio_pci_disable_modern(proxy);
1978
1979 if (!legacy) {
1980 error_setg(errp, "Device doesn't support modern mode, and legacy"
1981 " mode is disabled");
1982 error_append_hint(errp, "Set disable-legacy to off\n");
1983
1984 return;
1985 }
1986 }
1987
1988 modern = virtio_pci_modern(proxy);
1989
1990 config = proxy->pci_dev.config;
1991 if (proxy->class_code) {
1992 pci_config_set_class(config, proxy->class_code);
1993 }
1994
1995 if (legacy) {
1996 if (!virtio_legacy_allowed(vdev)) {
1997 /*
1998 * To avoid migration issues, we allow legacy mode when legacy
1999 * check is disabled in the old machine types (< 5.1).
2000 */
2001 if (virtio_legacy_check_disabled(vdev)) {
2002 warn_report("device is modern-only, but for backward "
2003 "compatibility legacy is allowed");
2004 } else {
2005 error_setg(errp,
2006 "device is modern-only, use disable-legacy=on");
2007 return;
2008 }
2009 }
2010 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2011 error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
2012 " neither legacy nor transitional device");
2013 return;
2014 }
2015 /*
2016 * Legacy and transitional devices use specific subsystem IDs.
2017 * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
2018 * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
2019 */
2020 pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
2021 if (proxy->trans_devid) {
2022 pci_config_set_device_id(config, proxy->trans_devid);
2023 }
2024 } else {
2025 /* pure virtio-1.0 */
2026 pci_set_word(config + PCI_VENDOR_ID,
2027 PCI_VENDOR_ID_REDHAT_QUMRANET);
2028 pci_set_word(config + PCI_DEVICE_ID,
2029 PCI_DEVICE_ID_VIRTIO_10_BASE + virtio_bus_get_vdev_id(bus));
2030 pci_config_set_revision(config, 1);
2031 }
2032 config[PCI_INTERRUPT_PIN] = 1;
2033
2034
2035 if (modern) {
2036 struct virtio_pci_cap cap = {
2037 .cap_len = sizeof cap,
2038 };
2039 struct virtio_pci_notify_cap notify = {
2040 .cap.cap_len = sizeof notify,
2041 .notify_off_multiplier =
2042 cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
2043 };
2044 struct virtio_pci_cfg_cap cfg = {
2045 .cap.cap_len = sizeof cfg,
2046 .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
2047 };
2048 struct virtio_pci_notify_cap notify_pio = {
2049 .cap.cap_len = sizeof notify,
2050 .notify_off_multiplier = cpu_to_le32(0x0),
2051 };
2052
2053 struct virtio_pci_cfg_cap *cfg_mask;
2054
2055 virtio_pci_modern_regions_init(proxy, vdev->name);
2056
2057 virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
2058 virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
2059 virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
2060 virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap);
2061
2062 if (modern_pio) {
2063 memory_region_init(&proxy->io_bar, OBJECT(proxy),
2064 "virtio-pci-io", 0x4);
2065 address_space_init(&proxy->modern_cfg_io_as, &proxy->io_bar,
2066 "virtio-pci-cfg-io-as");
2067
2068 pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
2069 PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
2070
2071 virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
2072 ¬ify_pio.cap);
2073 }
2074
2075 pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
2076 PCI_BASE_ADDRESS_SPACE_MEMORY |
2077 PCI_BASE_ADDRESS_MEM_PREFETCH |
2078 PCI_BASE_ADDRESS_MEM_TYPE_64,
2079 &proxy->modern_bar);
2080
2081 proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
2082 cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
2083 pci_set_byte(&cfg_mask->cap.bar, ~0x0);
2084 pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
2085 pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
2086 pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
2087 }
2088
2089 if (proxy->nvectors) {
2090 int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
2091 proxy->msix_bar_idx, NULL);
2092 if (err) {
2093 /* Notice when a system that supports MSIx can't initialize it */
2094 if (err != -ENOTSUP) {
2095 warn_report("unable to init msix vectors to %" PRIu32,
2096 proxy->nvectors);
2097 }
2098 proxy->nvectors = 0;
2099 }
2100 }
2101
2102 proxy->pci_dev.config_write = virtio_write_config;
2103 proxy->pci_dev.config_read = virtio_read_config;
2104
2105 if (legacy) {
2106 size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
2107 + virtio_bus_get_vdev_config_len(bus);
2108 size = pow2ceil(size);
2109
2110 memory_region_init_io(&proxy->bar, OBJECT(proxy),
2111 &virtio_pci_config_ops,
2112 proxy, "virtio-pci", size);
2113
2114 pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
2115 PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
2116 }
2117
2118 if (pci_is_vf(&proxy->pci_dev)) {
2119 pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset);
2120 proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF;
2121 } else {
2122 res = pcie_sriov_pf_init_from_user_created_vfs(
2123 &proxy->pci_dev, proxy->last_pcie_cap_offset, errp);
2124 if (res > 0) {
2125 proxy->last_pcie_cap_offset += res;
2126 virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
2127 }
2128 }
2129 }
2130
virtio_pci_device_unplugged(DeviceState * d)2131 static void virtio_pci_device_unplugged(DeviceState *d)
2132 {
2133 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
2134 bool modern = virtio_pci_modern(proxy);
2135 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
2136
2137 virtio_pci_stop_ioeventfd(proxy);
2138
2139 if (modern) {
2140 virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
2141 virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
2142 virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
2143 virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
2144 if (modern_pio) {
2145 virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
2146 }
2147 }
2148 }
2149
virtio_pci_realize(PCIDevice * pci_dev,Error ** errp)2150 static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
2151 {
2152 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
2153 VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
2154 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
2155 !pci_bus_is_root(pci_get_bus(pci_dev));
2156
2157 /* fd-based ioevents can't be synchronized in record/replay */
2158 if (replay_mode != REPLAY_MODE_NONE) {
2159 proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
2160 }
2161
2162 /*
2163 * virtio pci bar layout used by default.
2164 * subclasses can re-arrange things if needed.
2165 *
2166 * region 0 -- virtio legacy io bar
2167 * region 1 -- msi-x bar
2168 * region 2 -- virtio modern io bar (off by default)
2169 * region 4+5 -- virtio modern memory (64bit) bar
2170 *
2171 */
2172 proxy->legacy_io_bar_idx = 0;
2173 proxy->msix_bar_idx = 1;
2174 proxy->modern_io_bar_idx = 2;
2175 proxy->modern_mem_bar_idx = 4;
2176
2177 proxy->common.offset = 0x0;
2178 proxy->common.size = 0x1000;
2179 proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
2180
2181 proxy->isr.offset = 0x1000;
2182 proxy->isr.size = 0x1000;
2183 proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
2184
2185 proxy->device.offset = 0x2000;
2186 proxy->device.size = 0x1000;
2187 proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
2188
2189 proxy->notify.offset = 0x3000;
2190 proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
2191 proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
2192
2193 proxy->notify_pio.offset = 0x0;
2194 proxy->notify_pio.size = 0x4;
2195 proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
2196
2197 /* subclasses can enforce modern, so do this unconditionally */
2198 memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
2199 /* PCI BAR regions must be powers of 2 */
2200 pow2ceil(proxy->notify.offset + proxy->notify.size));
2201
2202 address_space_init(&proxy->modern_cfg_mem_as, &proxy->modern_bar,
2203 "virtio-pci-cfg-mem-as");
2204
2205 if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
2206 proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
2207 }
2208
2209 if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
2210 error_setg(errp, "device cannot work as neither modern nor legacy mode"
2211 " is enabled");
2212 error_append_hint(errp, "Set either disable-modern or disable-legacy"
2213 " to off\n");
2214 return;
2215 }
2216
2217 if (pcie_port && pci_is_express(pci_dev)) {
2218 int pos;
2219 proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
2220
2221 pos = pcie_endpoint_cap_init(pci_dev, 0);
2222 assert(pos > 0);
2223
2224 pos = pci_pm_init(pci_dev, 0, errp);
2225 if (pos < 0) {
2226 return;
2227 }
2228
2229 /*
2230 * Indicates that this function complies with revision 1.2 of the
2231 * PCI Power Management Interface Specification.
2232 */
2233 pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
2234
2235 if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
2236 pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset,
2237 PCI_ERR_SIZEOF, NULL);
2238 proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF;
2239 }
2240
2241 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
2242 /* Init error enabling flags */
2243 pcie_cap_deverr_init(pci_dev);
2244 }
2245
2246 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
2247 /* Init Link Control Register */
2248 pcie_cap_lnkctl_init(pci_dev);
2249 }
2250
2251 if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) {
2252 pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
2253 PCI_PM_CTRL_NO_SOFT_RESET);
2254 }
2255
2256 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
2257 /* Init Power Management Control Register */
2258 pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
2259 PCI_PM_CTRL_STATE_MASK);
2260 }
2261
2262 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
2263 pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset,
2264 proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
2265 proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
2266 }
2267
2268 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
2269 /* Set Function Level Reset capability bit */
2270 pcie_cap_flr_init(pci_dev);
2271 }
2272 } else {
2273 /*
2274 * make future invocations of pci_is_express() return false
2275 * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
2276 */
2277 pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
2278 }
2279
2280 virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
2281 if (k->realize) {
2282 k->realize(proxy, errp);
2283 }
2284 }
2285
virtio_pci_exit(PCIDevice * pci_dev)2286 static void virtio_pci_exit(PCIDevice *pci_dev)
2287 {
2288 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
2289 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
2290 !pci_bus_is_root(pci_get_bus(pci_dev));
2291 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
2292
2293 pcie_sriov_pf_exit(&proxy->pci_dev);
2294 msix_uninit_exclusive_bar(pci_dev);
2295 if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
2296 pci_is_express(pci_dev)) {
2297 pcie_aer_exit(pci_dev);
2298 }
2299 address_space_destroy(&proxy->modern_cfg_mem_as);
2300 if (modern_pio) {
2301 address_space_destroy(&proxy->modern_cfg_io_as);
2302 }
2303 }
2304
virtio_pci_reset(DeviceState * qdev)2305 static void virtio_pci_reset(DeviceState *qdev)
2306 {
2307 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
2308 VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
2309 int i;
2310
2311 virtio_bus_reset(bus);
2312 msix_unuse_all_vectors(&proxy->pci_dev);
2313
2314 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2315 proxy->vqs[i].enabled = 0;
2316 proxy->vqs[i].reset = 0;
2317 proxy->vqs[i].num = 0;
2318 proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
2319 proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
2320 proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
2321 }
2322 }
2323
virtio_pci_no_soft_reset(PCIDevice * dev)2324 static bool virtio_pci_no_soft_reset(PCIDevice *dev)
2325 {
2326 uint16_t pmcsr;
2327
2328 if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
2329 return false;
2330 }
2331
2332 pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
2333
2334 /*
2335 * When No_Soft_Reset bit is set and the device
2336 * is in D3hot state, don't reset device
2337 */
2338 return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) &&
2339 (pmcsr & PCI_PM_CTRL_STATE_MASK) == 3;
2340 }
2341
virtio_pci_bus_reset_hold(Object * obj,ResetType type)2342 static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
2343 {
2344 PCIDevice *dev = PCI_DEVICE(obj);
2345 DeviceState *qdev = DEVICE(obj);
2346
2347 if (virtio_pci_no_soft_reset(dev)) {
2348 return;
2349 }
2350
2351 virtio_pci_reset(qdev);
2352
2353 if (pci_is_express(dev)) {
2354 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
2355
2356 pcie_cap_deverr_reset(dev);
2357 pcie_cap_lnkctl_reset(dev);
2358
2359 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
2360 pci_word_test_and_clear_mask(
2361 dev->config + dev->pm_cap + PCI_PM_CTRL,
2362 PCI_PM_CTRL_STATE_MASK);
2363 }
2364 }
2365 }
2366
2367 static const Property virtio_pci_properties[] = {
2368 DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
2369 VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
2370 DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
2371 VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
2372 DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
2373 VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
2374 DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
2375 ignore_backend_features, false),
2376 DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
2377 VIRTIO_PCI_FLAG_ATS_BIT, false),
2378 DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
2379 VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
2380 DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
2381 VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
2382 DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
2383 VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
2384 DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
2385 VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
2386 DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags,
2387 VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false),
2388 DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
2389 VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
2390 DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
2391 VIRTIO_PCI_FLAG_AER_BIT, false),
2392 };
2393
virtio_pci_dc_realize(DeviceState * qdev,Error ** errp)2394 static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
2395 {
2396 VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
2397 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
2398 PCIDevice *pci_dev = &proxy->pci_dev;
2399
2400 if (virtio_pci_modern(proxy)) {
2401 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2402 }
2403
2404 vpciklass->parent_dc_realize(qdev, errp);
2405 }
2406
virtio_pci_sync_config(DeviceState * dev,Error ** errp)2407 static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
2408 {
2409 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
2410 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
2411
2412 return qdev_sync_config(DEVICE(vdev), errp);
2413 }
2414
virtio_pci_class_init(ObjectClass * klass,const void * data)2415 static void virtio_pci_class_init(ObjectClass *klass, const void *data)
2416 {
2417 DeviceClass *dc = DEVICE_CLASS(klass);
2418 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
2419 VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
2420 ResettableClass *rc = RESETTABLE_CLASS(klass);
2421
2422 device_class_set_props(dc, virtio_pci_properties);
2423 k->realize = virtio_pci_realize;
2424 k->exit = virtio_pci_exit;
2425 k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
2426 k->revision = VIRTIO_PCI_ABI_VERSION;
2427 k->class_id = PCI_CLASS_OTHERS;
2428 device_class_set_parent_realize(dc, virtio_pci_dc_realize,
2429 &vpciklass->parent_dc_realize);
2430 rc->phases.hold = virtio_pci_bus_reset_hold;
2431 dc->sync_config = virtio_pci_sync_config;
2432 }
2433
2434 static const TypeInfo virtio_pci_info = {
2435 .name = TYPE_VIRTIO_PCI,
2436 .parent = TYPE_PCI_DEVICE,
2437 .instance_size = sizeof(VirtIOPCIProxy),
2438 .class_init = virtio_pci_class_init,
2439 .class_size = sizeof(VirtioPCIClass),
2440 .abstract = true,
2441 };
2442
2443 static const Property virtio_pci_generic_properties[] = {
2444 DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
2445 ON_OFF_AUTO_AUTO),
2446 DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
2447 };
2448
virtio_pci_base_class_init(ObjectClass * klass,const void * data)2449 static void virtio_pci_base_class_init(ObjectClass *klass, const void *data)
2450 {
2451 const VirtioPCIDeviceTypeInfo *t = data;
2452 if (t->class_init) {
2453 t->class_init(klass, NULL);
2454 }
2455 }
2456
virtio_pci_generic_class_init(ObjectClass * klass,const void * data)2457 static void virtio_pci_generic_class_init(ObjectClass *klass, const void *data)
2458 {
2459 DeviceClass *dc = DEVICE_CLASS(klass);
2460
2461 device_class_set_props(dc, virtio_pci_generic_properties);
2462 }
2463
virtio_pci_transitional_instance_init(Object * obj)2464 static void virtio_pci_transitional_instance_init(Object *obj)
2465 {
2466 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2467
2468 proxy->disable_legacy = ON_OFF_AUTO_OFF;
2469 proxy->disable_modern = false;
2470 }
2471
virtio_pci_non_transitional_instance_init(Object * obj)2472 static void virtio_pci_non_transitional_instance_init(Object *obj)
2473 {
2474 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2475
2476 proxy->disable_legacy = ON_OFF_AUTO_ON;
2477 proxy->disable_modern = false;
2478 }
2479
virtio_pci_types_register(const VirtioPCIDeviceTypeInfo * t)2480 void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
2481 {
2482 char *base_name = NULL;
2483 TypeInfo base_type_info = {
2484 .name = t->base_name,
2485 .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI,
2486 .instance_size = t->instance_size,
2487 .instance_init = t->instance_init,
2488 .instance_finalize = t->instance_finalize,
2489 .class_size = t->class_size,
2490 .abstract = true,
2491 .interfaces = t->interfaces,
2492 };
2493 TypeInfo generic_type_info = {
2494 .name = t->generic_name,
2495 .parent = base_type_info.name,
2496 .class_init = virtio_pci_generic_class_init,
2497 .interfaces = (const InterfaceInfo[]) {
2498 { INTERFACE_PCIE_DEVICE },
2499 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2500 { }
2501 },
2502 };
2503
2504 if (!base_type_info.name) {
2505 /* No base type -> register a single generic device type */
2506 /* use intermediate %s-base-type to add generic device props */
2507 base_name = g_strdup_printf("%s-base-type", t->generic_name);
2508 base_type_info.name = base_name;
2509 base_type_info.class_init = virtio_pci_generic_class_init;
2510
2511 generic_type_info.parent = base_name;
2512 generic_type_info.class_init = virtio_pci_base_class_init;
2513 generic_type_info.class_data = t;
2514
2515 assert(!t->non_transitional_name);
2516 assert(!t->transitional_name);
2517 } else {
2518 base_type_info.class_init = virtio_pci_base_class_init;
2519 base_type_info.class_data = t;
2520 }
2521
2522 type_register_static(&base_type_info);
2523 if (generic_type_info.name) {
2524 type_register_static(&generic_type_info);
2525 }
2526
2527 if (t->non_transitional_name) {
2528 const TypeInfo non_transitional_type_info = {
2529 .name = t->non_transitional_name,
2530 .parent = base_type_info.name,
2531 .instance_init = virtio_pci_non_transitional_instance_init,
2532 .interfaces = (const InterfaceInfo[]) {
2533 { INTERFACE_PCIE_DEVICE },
2534 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2535 { }
2536 },
2537 };
2538 type_register_static(&non_transitional_type_info);
2539 }
2540
2541 if (t->transitional_name) {
2542 const TypeInfo transitional_type_info = {
2543 .name = t->transitional_name,
2544 .parent = base_type_info.name,
2545 .instance_init = virtio_pci_transitional_instance_init,
2546 .interfaces = (const InterfaceInfo[]) {
2547 /*
2548 * Transitional virtio devices work only as Conventional PCI
2549 * devices because they require PIO ports.
2550 */
2551 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2552 { }
2553 },
2554 };
2555 type_register_static(&transitional_type_info);
2556 }
2557 g_free(base_name);
2558 }
2559
virtio_pci_optimal_num_queues(unsigned fixed_queues)2560 unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
2561 {
2562 /*
2563 * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
2564 * virtqueue buffers can handle their completion. When a different vCPU
2565 * handles completion it may need to IPI the vCPU that submitted the
2566 * request and this adds overhead.
2567 *
2568 * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
2569 * guests with very many vCPUs and a device that is only used by a few
2570 * vCPUs. Unfortunately optimizing that case requires manual pinning inside
2571 * the guest, so those users might as well manually set the number of
2572 * queues. There is no upper limit that can be applied automatically and
2573 * doing so arbitrarily would result in a sudden performance drop once the
2574 * threshold number of vCPUs is exceeded.
2575 */
2576 unsigned num_queues = current_machine->smp.cpus;
2577
2578 /*
2579 * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
2580 * config change interrupt and the fixed virtqueues must be taken into
2581 * account too.
2582 */
2583 num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
2584
2585 /*
2586 * There is a limit to how many virtqueues a device can have.
2587 */
2588 return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
2589 }
2590
2591 /* virtio-pci-bus */
2592
virtio_pci_bus_new(VirtioBusState * bus,size_t bus_size,VirtIOPCIProxy * dev)2593 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2594 VirtIOPCIProxy *dev)
2595 {
2596 DeviceState *qdev = DEVICE(dev);
2597 char virtio_bus_name[] = "virtio-bus";
2598
2599 qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
2600 }
2601
virtio_pci_bus_class_init(ObjectClass * klass,const void * data)2602 static void virtio_pci_bus_class_init(ObjectClass *klass, const void *data)
2603 {
2604 BusClass *bus_class = BUS_CLASS(klass);
2605 VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2606 bus_class->max_dev = 1;
2607 k->notify = virtio_pci_notify;
2608 k->save_config = virtio_pci_save_config;
2609 k->load_config = virtio_pci_load_config;
2610 k->save_queue = virtio_pci_save_queue;
2611 k->load_queue = virtio_pci_load_queue;
2612 k->save_extra_state = virtio_pci_save_extra_state;
2613 k->load_extra_state = virtio_pci_load_extra_state;
2614 k->has_extra_state = virtio_pci_has_extra_state;
2615 k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2616 k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2617 k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2618 k->vmstate_change = virtio_pci_vmstate_change;
2619 k->pre_plugged = virtio_pci_pre_plugged;
2620 k->device_plugged = virtio_pci_device_plugged;
2621 k->device_unplugged = virtio_pci_device_unplugged;
2622 k->query_nvectors = virtio_pci_query_nvectors;
2623 k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2624 k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2625 k->get_dma_as = virtio_pci_get_dma_as;
2626 k->iommu_enabled = virtio_pci_iommu_enabled;
2627 k->queue_enabled = virtio_pci_queue_enabled;
2628 }
2629
2630 static const TypeInfo virtio_pci_bus_info = {
2631 .name = TYPE_VIRTIO_PCI_BUS,
2632 .parent = TYPE_VIRTIO_BUS,
2633 .instance_size = sizeof(VirtioPCIBusState),
2634 .class_size = sizeof(VirtioPCIBusClass),
2635 .class_init = virtio_pci_bus_class_init,
2636 };
2637
virtio_pci_register_types(void)2638 static void virtio_pci_register_types(void)
2639 {
2640 /* Base types: */
2641 type_register_static(&virtio_pci_bus_info);
2642 type_register_static(&virtio_pci_info);
2643 }
2644
2645 type_init(virtio_pci_register_types)
2646
2647