xref: /openbmc/qemu/hw/vfio/pci-quirks.c (revision 06d4c71f)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "exec/memop.h"
15 #include "qemu/units.h"
16 #include "qemu/error-report.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "qemu/range.h"
20 #include "qapi/error.h"
21 #include "qapi/visitor.h"
22 #include <sys/ioctl.h>
23 #include "hw/hw.h"
24 #include "hw/nvram/fw_cfg.h"
25 #include "hw/qdev-properties.h"
26 #include "pci.h"
27 #include "trace.h"
28 
29 /*
30  * List of device ids/vendor ids for which to disable
31  * option rom loading. This avoids the guest hangs during rom
32  * execution as noticed with the BCM 57810 card for lack of a
33  * more better way to handle such issues.
34  * The  user can still override by specifying a romfile or
35  * rombar=1.
36  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
37  * for an analysis of the 57810 card hang. When adding
38  * a new vendor id/device id combination below, please also add
39  * your card/environment details and information that could
40  * help in debugging to the bug tracking this issue
41  */
42 static const struct {
43     uint32_t vendor;
44     uint32_t device;
45 } romblacklist[] = {
46     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
47 };
48 
49 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
50 {
51     int i;
52 
53     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
54         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
55             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
56                                              romblacklist[i].vendor,
57                                              romblacklist[i].device);
58             return true;
59         }
60     }
61     return false;
62 }
63 
64 /*
65  * Device specific region quirks (mostly backdoors to PCI config space)
66  */
67 
68 /*
69  * The generic window quirks operate on an address and data register,
70  * vfio_generic_window_address_quirk handles the address register and
71  * vfio_generic_window_data_quirk handles the data register.  These ops
72  * pass reads and writes through to hardware until a value matching the
73  * stored address match/mask is written.  When this occurs, the data
74  * register access emulated PCI config space for the device rather than
75  * passing through accesses.  This enables devices where PCI config space
76  * is accessible behind a window register to maintain the virtualization
77  * provided through vfio.
78  */
79 typedef struct VFIOConfigWindowMatch {
80     uint32_t match;
81     uint32_t mask;
82 } VFIOConfigWindowMatch;
83 
84 typedef struct VFIOConfigWindowQuirk {
85     struct VFIOPCIDevice *vdev;
86 
87     uint32_t address_val;
88 
89     uint32_t address_offset;
90     uint32_t data_offset;
91 
92     bool window_enabled;
93     uint8_t bar;
94 
95     MemoryRegion *addr_mem;
96     MemoryRegion *data_mem;
97 
98     uint32_t nr_matches;
99     VFIOConfigWindowMatch matches[];
100 } VFIOConfigWindowQuirk;
101 
102 static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
103                                                        hwaddr addr,
104                                                        unsigned size)
105 {
106     VFIOConfigWindowQuirk *window = opaque;
107     VFIOPCIDevice *vdev = window->vdev;
108 
109     return vfio_region_read(&vdev->bars[window->bar].region,
110                             addr + window->address_offset, size);
111 }
112 
113 static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
114                                                     uint64_t data,
115                                                     unsigned size)
116 {
117     VFIOConfigWindowQuirk *window = opaque;
118     VFIOPCIDevice *vdev = window->vdev;
119     int i;
120 
121     window->window_enabled = false;
122 
123     vfio_region_write(&vdev->bars[window->bar].region,
124                       addr + window->address_offset, data, size);
125 
126     for (i = 0; i < window->nr_matches; i++) {
127         if ((data & ~window->matches[i].mask) == window->matches[i].match) {
128             window->window_enabled = true;
129             window->address_val = data & window->matches[i].mask;
130             trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
131                                     memory_region_name(window->addr_mem), data);
132             break;
133         }
134     }
135 }
136 
137 static const MemoryRegionOps vfio_generic_window_address_quirk = {
138     .read = vfio_generic_window_quirk_address_read,
139     .write = vfio_generic_window_quirk_address_write,
140     .endianness = DEVICE_LITTLE_ENDIAN,
141 };
142 
143 static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
144                                                     hwaddr addr, unsigned size)
145 {
146     VFIOConfigWindowQuirk *window = opaque;
147     VFIOPCIDevice *vdev = window->vdev;
148     uint64_t data;
149 
150     /* Always read data reg, discard if window enabled */
151     data = vfio_region_read(&vdev->bars[window->bar].region,
152                             addr + window->data_offset, size);
153 
154     if (window->window_enabled) {
155         data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
156         trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
157                                     memory_region_name(window->data_mem), data);
158     }
159 
160     return data;
161 }
162 
163 static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
164                                                  uint64_t data, unsigned size)
165 {
166     VFIOConfigWindowQuirk *window = opaque;
167     VFIOPCIDevice *vdev = window->vdev;
168 
169     if (window->window_enabled) {
170         vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
171         trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
172                                     memory_region_name(window->data_mem), data);
173         return;
174     }
175 
176     vfio_region_write(&vdev->bars[window->bar].region,
177                       addr + window->data_offset, data, size);
178 }
179 
180 static const MemoryRegionOps vfio_generic_window_data_quirk = {
181     .read = vfio_generic_window_quirk_data_read,
182     .write = vfio_generic_window_quirk_data_write,
183     .endianness = DEVICE_LITTLE_ENDIAN,
184 };
185 
186 /*
187  * The generic mirror quirk handles devices which expose PCI config space
188  * through a region within a BAR.  When enabled, reads and writes are
189  * redirected through to emulated PCI config space.  XXX if PCI config space
190  * used memory regions, this could just be an alias.
191  */
192 typedef struct VFIOConfigMirrorQuirk {
193     struct VFIOPCIDevice *vdev;
194     uint32_t offset;
195     uint8_t bar;
196     MemoryRegion *mem;
197     uint8_t data[];
198 } VFIOConfigMirrorQuirk;
199 
200 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
201                                                hwaddr addr, unsigned size)
202 {
203     VFIOConfigMirrorQuirk *mirror = opaque;
204     VFIOPCIDevice *vdev = mirror->vdev;
205     uint64_t data;
206 
207     /* Read and discard in case the hardware cares */
208     (void)vfio_region_read(&vdev->bars[mirror->bar].region,
209                            addr + mirror->offset, size);
210 
211     data = vfio_pci_read_config(&vdev->pdev, addr, size);
212     trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
213                                          memory_region_name(mirror->mem),
214                                          addr, data);
215     return data;
216 }
217 
218 static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
219                                             uint64_t data, unsigned size)
220 {
221     VFIOConfigMirrorQuirk *mirror = opaque;
222     VFIOPCIDevice *vdev = mirror->vdev;
223 
224     vfio_pci_write_config(&vdev->pdev, addr, data, size);
225     trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
226                                           memory_region_name(mirror->mem),
227                                           addr, data);
228 }
229 
230 static const MemoryRegionOps vfio_generic_mirror_quirk = {
231     .read = vfio_generic_quirk_mirror_read,
232     .write = vfio_generic_quirk_mirror_write,
233     .endianness = DEVICE_LITTLE_ENDIAN,
234 };
235 
236 /* Is range1 fully contained within range2?  */
237 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
238                                  uint64_t first2, uint64_t len2) {
239     return (first1 >= first2 && first1 + len1 <= first2 + len2);
240 }
241 
242 #define PCI_VENDOR_ID_ATI               0x1002
243 
244 /*
245  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
246  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
247  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
248  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
249  * I/O port BAR address.  Originally this was coded to return the virtual BAR
250  * address only if the physical register read returns the actual BAR address,
251  * but users have reported greater success if we return the virtual address
252  * unconditionally.
253  */
254 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
255                                         hwaddr addr, unsigned size)
256 {
257     VFIOPCIDevice *vdev = opaque;
258     uint64_t data = vfio_pci_read_config(&vdev->pdev,
259                                          PCI_BASE_ADDRESS_4 + 1, size);
260 
261     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
262 
263     return data;
264 }
265 
266 static const MemoryRegionOps vfio_ati_3c3_quirk = {
267     .read = vfio_ati_3c3_quirk_read,
268     .endianness = DEVICE_LITTLE_ENDIAN,
269 };
270 
271 VFIOQuirk *vfio_quirk_alloc(int nr_mem)
272 {
273     VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
274     QLIST_INIT(&quirk->ioeventfds);
275     quirk->mem = g_new0(MemoryRegion, nr_mem);
276     quirk->nr_mem = nr_mem;
277 
278     return quirk;
279 }
280 
281 static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
282 {
283     QLIST_REMOVE(ioeventfd, next);
284     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
285                               true, ioeventfd->data, &ioeventfd->e);
286 
287     if (ioeventfd->vfio) {
288         struct vfio_device_ioeventfd vfio_ioeventfd;
289 
290         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
291         vfio_ioeventfd.flags = ioeventfd->size;
292         vfio_ioeventfd.data = ioeventfd->data;
293         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
294                                 ioeventfd->region_addr;
295         vfio_ioeventfd.fd = -1;
296 
297         if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
298             error_report("Failed to remove vfio ioeventfd for %s+0x%"
299                          HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
300                          memory_region_name(ioeventfd->mr), ioeventfd->addr,
301                          ioeventfd->size, ioeventfd->data);
302         }
303     } else {
304         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
305                             NULL, NULL, NULL);
306     }
307 
308     event_notifier_cleanup(&ioeventfd->e);
309     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
310                               (uint64_t)ioeventfd->addr, ioeventfd->size,
311                               ioeventfd->data);
312     g_free(ioeventfd);
313 }
314 
315 static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
316 {
317     VFIOIOEventFD *ioeventfd, *tmp;
318 
319     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
320         if (ioeventfd->dynamic) {
321             vfio_ioeventfd_exit(vdev, ioeventfd);
322         }
323     }
324 }
325 
326 static void vfio_ioeventfd_handler(void *opaque)
327 {
328     VFIOIOEventFD *ioeventfd = opaque;
329 
330     if (event_notifier_test_and_clear(&ioeventfd->e)) {
331         vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
332                           ioeventfd->data, ioeventfd->size);
333         trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
334                                      (uint64_t)ioeventfd->addr, ioeventfd->size,
335                                      ioeventfd->data);
336     }
337 }
338 
339 static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
340                                           MemoryRegion *mr, hwaddr addr,
341                                           unsigned size, uint64_t data,
342                                           VFIORegion *region,
343                                           hwaddr region_addr, bool dynamic)
344 {
345     VFIOIOEventFD *ioeventfd;
346 
347     if (vdev->no_kvm_ioeventfd) {
348         return NULL;
349     }
350 
351     ioeventfd = g_malloc0(sizeof(*ioeventfd));
352 
353     if (event_notifier_init(&ioeventfd->e, 0)) {
354         g_free(ioeventfd);
355         return NULL;
356     }
357 
358     /*
359      * MemoryRegion and relative offset, plus additional ioeventfd setup
360      * parameters for configuring and later tearing down KVM ioeventfd.
361      */
362     ioeventfd->mr = mr;
363     ioeventfd->addr = addr;
364     ioeventfd->size = size;
365     ioeventfd->data = data;
366     ioeventfd->dynamic = dynamic;
367     /*
368      * VFIORegion and relative offset for implementing the userspace
369      * handler.  data & size fields shared for both uses.
370      */
371     ioeventfd->region = region;
372     ioeventfd->region_addr = region_addr;
373 
374     if (!vdev->no_vfio_ioeventfd) {
375         struct vfio_device_ioeventfd vfio_ioeventfd;
376 
377         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
378         vfio_ioeventfd.flags = ioeventfd->size;
379         vfio_ioeventfd.data = ioeventfd->data;
380         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
381                                 ioeventfd->region_addr;
382         vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
383 
384         ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
385                                  VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
386     }
387 
388     if (!ioeventfd->vfio) {
389         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
390                             vfio_ioeventfd_handler, NULL, ioeventfd);
391     }
392 
393     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
394                               true, ioeventfd->data, &ioeventfd->e);
395     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
396                               size, data, ioeventfd->vfio);
397 
398     return ioeventfd;
399 }
400 
401 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
402 {
403     VFIOQuirk *quirk;
404 
405     /*
406      * As long as the BAR is >= 256 bytes it will be aligned such that the
407      * lower byte is always zero.  Filter out anything else, if it exists.
408      */
409     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
410         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
411         return;
412     }
413 
414     quirk = vfio_quirk_alloc(1);
415 
416     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
417                           "vfio-ati-3c3-quirk", 1);
418     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
419                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
420 
421     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
422                       quirk, next);
423 
424     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
425 }
426 
427 /*
428  * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
429  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
430  * the MMIO space directly, but a window to this space is provided through
431  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
432  * data register.  When the address is programmed to a range of 0x4000-0x4fff
433  * PCI configuration space is available.  Experimentation seems to indicate
434  * that read-only may be provided by hardware.
435  */
436 static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
437 {
438     VFIOQuirk *quirk;
439     VFIOConfigWindowQuirk *window;
440 
441     /* This windows doesn't seem to be used except by legacy VGA code */
442     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
443         !vdev->vga || nr != 4) {
444         return;
445     }
446 
447     quirk = vfio_quirk_alloc(2);
448     window = quirk->data = g_malloc0(sizeof(*window) +
449                                      sizeof(VFIOConfigWindowMatch));
450     window->vdev = vdev;
451     window->address_offset = 0;
452     window->data_offset = 4;
453     window->nr_matches = 1;
454     window->matches[0].match = 0x4000;
455     window->matches[0].mask = vdev->config_size - 1;
456     window->bar = nr;
457     window->addr_mem = &quirk->mem[0];
458     window->data_mem = &quirk->mem[1];
459 
460     memory_region_init_io(window->addr_mem, OBJECT(vdev),
461                           &vfio_generic_window_address_quirk, window,
462                           "vfio-ati-bar4-window-address-quirk", 4);
463     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
464                                         window->address_offset,
465                                         window->addr_mem, 1);
466 
467     memory_region_init_io(window->data_mem, OBJECT(vdev),
468                           &vfio_generic_window_data_quirk, window,
469                           "vfio-ati-bar4-window-data-quirk", 4);
470     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
471                                         window->data_offset,
472                                         window->data_mem, 1);
473 
474     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
475 
476     trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
477 }
478 
479 /*
480  * Trap the BAR2 MMIO mirror to config space as well.
481  */
482 static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
483 {
484     VFIOQuirk *quirk;
485     VFIOConfigMirrorQuirk *mirror;
486 
487     /* Only enable on newer devices where BAR2 is 64bit */
488     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
489         !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
490         return;
491     }
492 
493     quirk = vfio_quirk_alloc(1);
494     mirror = quirk->data = g_malloc0(sizeof(*mirror));
495     mirror->mem = quirk->mem;
496     mirror->vdev = vdev;
497     mirror->offset = 0x4000;
498     mirror->bar = nr;
499 
500     memory_region_init_io(mirror->mem, OBJECT(vdev),
501                           &vfio_generic_mirror_quirk, mirror,
502                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
503     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
504                                         mirror->offset, mirror->mem, 1);
505 
506     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
507 
508     trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
509 }
510 
511 /*
512  * Older ATI/AMD cards like the X550 have a similar window to that above.
513  * I/O port BAR1 provides a window to a mirror of PCI config space located
514  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
515  * note it for future reference.
516  */
517 
518 /*
519  * Nvidia has several different methods to get to config space, the
520  * nouveu project has several of these documented here:
521  * https://github.com/pathscale/envytools/tree/master/hwdocs
522  *
523  * The first quirk is actually not documented in envytools and is found
524  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
525  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
526  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
527  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
528  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
529  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
530  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
531  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
532  */
533 typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
534 static const char *nv3d0_states[] = { "NONE", "SELECT",
535                                       "WINDOW", "READ", "WRITE" };
536 
537 typedef struct VFIONvidia3d0Quirk {
538     VFIOPCIDevice *vdev;
539     VFIONvidia3d0State state;
540     uint32_t offset;
541 } VFIONvidia3d0Quirk;
542 
543 static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
544                                            hwaddr addr, unsigned size)
545 {
546     VFIONvidia3d0Quirk *quirk = opaque;
547     VFIOPCIDevice *vdev = quirk->vdev;
548 
549     quirk->state = NONE;
550 
551     return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
552                          addr + 0x14, size);
553 }
554 
555 static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
556                                         uint64_t data, unsigned size)
557 {
558     VFIONvidia3d0Quirk *quirk = opaque;
559     VFIOPCIDevice *vdev = quirk->vdev;
560     VFIONvidia3d0State old_state = quirk->state;
561 
562     quirk->state = NONE;
563 
564     switch (data) {
565     case 0x338:
566         if (old_state == NONE) {
567             quirk->state = SELECT;
568             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
569                                               nv3d0_states[quirk->state]);
570         }
571         break;
572     case 0x538:
573         if (old_state == WINDOW) {
574             quirk->state = READ;
575             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
576                                               nv3d0_states[quirk->state]);
577         }
578         break;
579     case 0x738:
580         if (old_state == WINDOW) {
581             quirk->state = WRITE;
582             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
583                                               nv3d0_states[quirk->state]);
584         }
585         break;
586     }
587 
588     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
589                    addr + 0x14, data, size);
590 }
591 
592 static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
593     .read = vfio_nvidia_3d4_quirk_read,
594     .write = vfio_nvidia_3d4_quirk_write,
595     .endianness = DEVICE_LITTLE_ENDIAN,
596 };
597 
598 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
599                                            hwaddr addr, unsigned size)
600 {
601     VFIONvidia3d0Quirk *quirk = opaque;
602     VFIOPCIDevice *vdev = quirk->vdev;
603     VFIONvidia3d0State old_state = quirk->state;
604     uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
605                                   addr + 0x10, size);
606 
607     quirk->state = NONE;
608 
609     if (old_state == READ &&
610         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
611         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
612 
613         data = vfio_pci_read_config(&vdev->pdev, offset, size);
614         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
615                                          offset, size, data);
616     }
617 
618     return data;
619 }
620 
621 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
622                                         uint64_t data, unsigned size)
623 {
624     VFIONvidia3d0Quirk *quirk = opaque;
625     VFIOPCIDevice *vdev = quirk->vdev;
626     VFIONvidia3d0State old_state = quirk->state;
627 
628     quirk->state = NONE;
629 
630     if (old_state == SELECT) {
631         quirk->offset = (uint32_t)data;
632         quirk->state = WINDOW;
633         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
634                                           nv3d0_states[quirk->state]);
635     } else if (old_state == WRITE) {
636         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
637             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
638 
639             vfio_pci_write_config(&vdev->pdev, offset, data, size);
640             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
641                                               offset, data, size);
642             return;
643         }
644     }
645 
646     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
647                    addr + 0x10, data, size);
648 }
649 
650 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
651     .read = vfio_nvidia_3d0_quirk_read,
652     .write = vfio_nvidia_3d0_quirk_write,
653     .endianness = DEVICE_LITTLE_ENDIAN,
654 };
655 
656 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
657 {
658     VFIOQuirk *quirk;
659     VFIONvidia3d0Quirk *data;
660 
661     if (vdev->no_geforce_quirks ||
662         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
663         !vdev->bars[1].region.size) {
664         return;
665     }
666 
667     quirk = vfio_quirk_alloc(2);
668     quirk->data = data = g_malloc0(sizeof(*data));
669     data->vdev = vdev;
670 
671     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
672                           data, "vfio-nvidia-3d4-quirk", 2);
673     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
674                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
675 
676     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
677                           data, "vfio-nvidia-3d0-quirk", 2);
678     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
679                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
680 
681     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
682                       quirk, next);
683 
684     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
685 }
686 
687 /*
688  * The second quirk is documented in envytools.  The I/O port BAR5 is just
689  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
690  * again BAR0.  This backdoor is apparently a bit newer than the one above
691  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
692  * space, including extended space is available at the 4k @0x88000.
693  */
694 typedef struct VFIONvidiaBAR5Quirk {
695     uint32_t master;
696     uint32_t enable;
697     MemoryRegion *addr_mem;
698     MemoryRegion *data_mem;
699     bool enabled;
700     VFIOConfigWindowQuirk window; /* last for match data */
701 } VFIONvidiaBAR5Quirk;
702 
703 static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
704 {
705     VFIOPCIDevice *vdev = bar5->window.vdev;
706 
707     if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
708         return;
709     }
710 
711     bar5->enabled = !bar5->enabled;
712     trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
713                                        bar5->enabled ?  "Enable" : "Disable");
714     memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
715     memory_region_set_enabled(bar5->data_mem, bar5->enabled);
716 }
717 
718 static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
719                                                    hwaddr addr, unsigned size)
720 {
721     VFIONvidiaBAR5Quirk *bar5 = opaque;
722     VFIOPCIDevice *vdev = bar5->window.vdev;
723 
724     return vfio_region_read(&vdev->bars[5].region, addr, size);
725 }
726 
727 static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
728                                                 uint64_t data, unsigned size)
729 {
730     VFIONvidiaBAR5Quirk *bar5 = opaque;
731     VFIOPCIDevice *vdev = bar5->window.vdev;
732 
733     vfio_region_write(&vdev->bars[5].region, addr, data, size);
734 
735     bar5->master = data;
736     vfio_nvidia_bar5_enable(bar5);
737 }
738 
739 static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
740     .read = vfio_nvidia_bar5_quirk_master_read,
741     .write = vfio_nvidia_bar5_quirk_master_write,
742     .endianness = DEVICE_LITTLE_ENDIAN,
743 };
744 
745 static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
746                                                    hwaddr addr, unsigned size)
747 {
748     VFIONvidiaBAR5Quirk *bar5 = opaque;
749     VFIOPCIDevice *vdev = bar5->window.vdev;
750 
751     return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
752 }
753 
754 static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
755                                                 uint64_t data, unsigned size)
756 {
757     VFIONvidiaBAR5Quirk *bar5 = opaque;
758     VFIOPCIDevice *vdev = bar5->window.vdev;
759 
760     vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
761 
762     bar5->enable = data;
763     vfio_nvidia_bar5_enable(bar5);
764 }
765 
766 static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
767     .read = vfio_nvidia_bar5_quirk_enable_read,
768     .write = vfio_nvidia_bar5_quirk_enable_write,
769     .endianness = DEVICE_LITTLE_ENDIAN,
770 };
771 
772 static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
773 {
774     VFIOQuirk *quirk;
775     VFIONvidiaBAR5Quirk *bar5;
776     VFIOConfigWindowQuirk *window;
777 
778     if (vdev->no_geforce_quirks ||
779         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
780         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
781         return;
782     }
783 
784     quirk = vfio_quirk_alloc(4);
785     bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
786                                    (sizeof(VFIOConfigWindowMatch) * 2));
787     window = &bar5->window;
788 
789     window->vdev = vdev;
790     window->address_offset = 0x8;
791     window->data_offset = 0xc;
792     window->nr_matches = 2;
793     window->matches[0].match = 0x1800;
794     window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
795     window->matches[1].match = 0x88000;
796     window->matches[1].mask = vdev->config_size - 1;
797     window->bar = nr;
798     window->addr_mem = bar5->addr_mem = &quirk->mem[0];
799     window->data_mem = bar5->data_mem = &quirk->mem[1];
800 
801     memory_region_init_io(window->addr_mem, OBJECT(vdev),
802                           &vfio_generic_window_address_quirk, window,
803                           "vfio-nvidia-bar5-window-address-quirk", 4);
804     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
805                                         window->address_offset,
806                                         window->addr_mem, 1);
807     memory_region_set_enabled(window->addr_mem, false);
808 
809     memory_region_init_io(window->data_mem, OBJECT(vdev),
810                           &vfio_generic_window_data_quirk, window,
811                           "vfio-nvidia-bar5-window-data-quirk", 4);
812     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
813                                         window->data_offset,
814                                         window->data_mem, 1);
815     memory_region_set_enabled(window->data_mem, false);
816 
817     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
818                           &vfio_nvidia_bar5_quirk_master, bar5,
819                           "vfio-nvidia-bar5-master-quirk", 4);
820     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
821                                         0, &quirk->mem[2], 1);
822 
823     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
824                           &vfio_nvidia_bar5_quirk_enable, bar5,
825                           "vfio-nvidia-bar5-enable-quirk", 4);
826     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
827                                         4, &quirk->mem[3], 1);
828 
829     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
830 
831     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
832 }
833 
834 typedef struct LastDataSet {
835     VFIOQuirk *quirk;
836     hwaddr addr;
837     uint64_t data;
838     unsigned size;
839     int hits;
840     int added;
841 } LastDataSet;
842 
843 #define MAX_DYN_IOEVENTFD 10
844 #define HITS_FOR_IOEVENTFD 10
845 
846 /*
847  * Finally, BAR0 itself.  We want to redirect any accesses to either
848  * 0x1800 or 0x88000 through the PCI config space access functions.
849  */
850 static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
851                                            uint64_t data, unsigned size)
852 {
853     VFIOConfigMirrorQuirk *mirror = opaque;
854     VFIOPCIDevice *vdev = mirror->vdev;
855     PCIDevice *pdev = &vdev->pdev;
856     LastDataSet *last = (LastDataSet *)&mirror->data;
857 
858     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
859 
860     /*
861      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
862      * MSI capability ID register.  Both the ID and next register are
863      * read-only, so we allow writes covering either of those to real hw.
864      */
865     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
866         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
867         vfio_region_write(&vdev->bars[mirror->bar].region,
868                           addr + mirror->offset, data, size);
869         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
870     }
871 
872     /*
873      * Automatically add an ioeventfd to handle any repeated write with the
874      * same data and size above the standard PCI config space header.  This is
875      * primarily expected to accelerate the MSI-ACK behavior, such as noted
876      * above.  Current hardware/drivers should trigger an ioeventfd at config
877      * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
878      *
879      * The criteria of 10 successive hits is arbitrary but reliably adds the
880      * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
881      * the remaining ones have a greater chance of being seen successively.
882      * To avoid the pathological case of burning up all of QEMU's open file
883      * handles, arbitrarily limit this algorithm from adding no more than 10
884      * ioeventfds, print an error if we would have added an 11th, and then
885      * stop counting.
886      */
887     if (!vdev->no_kvm_ioeventfd &&
888         addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
889         if (addr != last->addr || data != last->data || size != last->size) {
890             last->addr = addr;
891             last->data = data;
892             last->size = size;
893             last->hits = 1;
894         } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
895             if (last->added < MAX_DYN_IOEVENTFD) {
896                 VFIOIOEventFD *ioeventfd;
897                 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
898                                         data, &vdev->bars[mirror->bar].region,
899                                         mirror->offset + addr, true);
900                 if (ioeventfd) {
901                     VFIOQuirk *quirk = last->quirk;
902 
903                     QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
904                     last->added++;
905                 }
906             } else {
907                 last->added++;
908                 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
909                             "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
910                             "size %u", vdev->vbasedev.name, addr, data, size);
911             }
912         }
913     }
914 }
915 
916 static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
917     .read = vfio_generic_quirk_mirror_read,
918     .write = vfio_nvidia_quirk_mirror_write,
919     .endianness = DEVICE_LITTLE_ENDIAN,
920 };
921 
922 static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
923 {
924     VFIOConfigMirrorQuirk *mirror = quirk->data;
925     LastDataSet *last = (LastDataSet *)&mirror->data;
926 
927     last->addr = last->data = last->size = last->hits = last->added = 0;
928 
929     vfio_drop_dynamic_eventfds(vdev, quirk);
930 }
931 
932 static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
933 {
934     VFIOQuirk *quirk;
935     VFIOConfigMirrorQuirk *mirror;
936     LastDataSet *last;
937 
938     if (vdev->no_geforce_quirks ||
939         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
940         !vfio_is_vga(vdev) || nr != 0) {
941         return;
942     }
943 
944     quirk = vfio_quirk_alloc(1);
945     quirk->reset = vfio_nvidia_bar0_quirk_reset;
946     mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
947     mirror->mem = quirk->mem;
948     mirror->vdev = vdev;
949     mirror->offset = 0x88000;
950     mirror->bar = nr;
951     last = (LastDataSet *)&mirror->data;
952     last->quirk = quirk;
953 
954     memory_region_init_io(mirror->mem, OBJECT(vdev),
955                           &vfio_nvidia_mirror_quirk, mirror,
956                           "vfio-nvidia-bar0-88000-mirror-quirk",
957                           vdev->config_size);
958     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
959                                         mirror->offset, mirror->mem, 1);
960 
961     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
962 
963     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
964     if (vdev->vga) {
965         quirk = vfio_quirk_alloc(1);
966         quirk->reset = vfio_nvidia_bar0_quirk_reset;
967         mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
968         mirror->mem = quirk->mem;
969         mirror->vdev = vdev;
970         mirror->offset = 0x1800;
971         mirror->bar = nr;
972         last = (LastDataSet *)&mirror->data;
973         last->quirk = quirk;
974 
975         memory_region_init_io(mirror->mem, OBJECT(vdev),
976                               &vfio_nvidia_mirror_quirk, mirror,
977                               "vfio-nvidia-bar0-1800-mirror-quirk",
978                               PCI_CONFIG_SPACE_SIZE);
979         memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
980                                             mirror->offset, mirror->mem, 1);
981 
982         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
983     }
984 
985     trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
986 }
987 
988 /*
989  * TODO - Some Nvidia devices provide config access to their companion HDA
990  * device and even to their parent bridge via these config space mirrors.
991  * Add quirks for those regions.
992  */
993 
994 #define PCI_VENDOR_ID_REALTEK 0x10ec
995 
996 /*
997  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
998  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
999  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1000  * when the "type" portion of the address register is set to 0x1.  This appears
1001  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1002  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1003  * ignore because the MSI-X table should always be accessed as a dword (full
1004  * mask).  Bits 0:11 is offset within the type.
1005  *
1006  * Example trace:
1007  *
1008  * Read from MSI-X table offset 0
1009  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1010  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1011  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1012  *
1013  * Write 0xfee00000 to MSI-X table offset 0
1014  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1015  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1016  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1017  */
1018 typedef struct VFIOrtl8168Quirk {
1019     VFIOPCIDevice *vdev;
1020     uint32_t addr;
1021     uint32_t data;
1022     bool enabled;
1023 } VFIOrtl8168Quirk;
1024 
1025 static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1026                                                 hwaddr addr, unsigned size)
1027 {
1028     VFIOrtl8168Quirk *rtl = opaque;
1029     VFIOPCIDevice *vdev = rtl->vdev;
1030     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1031 
1032     if (rtl->enabled) {
1033         data = rtl->addr ^ 0x80000000U; /* latch/complete */
1034         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1035     }
1036 
1037     return data;
1038 }
1039 
1040 static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1041                                              uint64_t data, unsigned size)
1042 {
1043     VFIOrtl8168Quirk *rtl = opaque;
1044     VFIOPCIDevice *vdev = rtl->vdev;
1045 
1046     rtl->enabled = false;
1047 
1048     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1049         rtl->enabled = true;
1050         rtl->addr = (uint32_t)data;
1051 
1052         if (data & 0x80000000U) { /* Do write */
1053             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1054                 hwaddr offset = data & 0xfff;
1055                 uint64_t val = rtl->data;
1056 
1057                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1058                                                     (uint16_t)offset, val);
1059 
1060                 /* Write to the proper guest MSI-X table instead */
1061                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1062                                              offset, val,
1063                                              size_memop(size) | MO_LE,
1064                                              MEMTXATTRS_UNSPECIFIED);
1065             }
1066             return; /* Do not write guest MSI-X data to hardware */
1067         }
1068     }
1069 
1070     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1071 }
1072 
1073 static const MemoryRegionOps vfio_rtl_address_quirk = {
1074     .read = vfio_rtl8168_quirk_address_read,
1075     .write = vfio_rtl8168_quirk_address_write,
1076     .valid = {
1077         .min_access_size = 4,
1078         .max_access_size = 4,
1079         .unaligned = false,
1080     },
1081     .endianness = DEVICE_LITTLE_ENDIAN,
1082 };
1083 
1084 static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1085                                              hwaddr addr, unsigned size)
1086 {
1087     VFIOrtl8168Quirk *rtl = opaque;
1088     VFIOPCIDevice *vdev = rtl->vdev;
1089     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1090 
1091     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1092         hwaddr offset = rtl->addr & 0xfff;
1093         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1094                                     &data, size_memop(size) | MO_LE,
1095                                     MEMTXATTRS_UNSPECIFIED);
1096         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1097     }
1098 
1099     return data;
1100 }
1101 
1102 static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1103                                           uint64_t data, unsigned size)
1104 {
1105     VFIOrtl8168Quirk *rtl = opaque;
1106     VFIOPCIDevice *vdev = rtl->vdev;
1107 
1108     rtl->data = (uint32_t)data;
1109 
1110     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1111 }
1112 
1113 static const MemoryRegionOps vfio_rtl_data_quirk = {
1114     .read = vfio_rtl8168_quirk_data_read,
1115     .write = vfio_rtl8168_quirk_data_write,
1116     .valid = {
1117         .min_access_size = 4,
1118         .max_access_size = 4,
1119         .unaligned = false,
1120     },
1121     .endianness = DEVICE_LITTLE_ENDIAN,
1122 };
1123 
1124 static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1125 {
1126     VFIOQuirk *quirk;
1127     VFIOrtl8168Quirk *rtl;
1128 
1129     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1130         return;
1131     }
1132 
1133     quirk = vfio_quirk_alloc(2);
1134     quirk->data = rtl = g_malloc0(sizeof(*rtl));
1135     rtl->vdev = vdev;
1136 
1137     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1138                           &vfio_rtl_address_quirk, rtl,
1139                           "vfio-rtl8168-window-address-quirk", 4);
1140     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1141                                         0x74, &quirk->mem[0], 1);
1142 
1143     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1144                           &vfio_rtl_data_quirk, rtl,
1145                           "vfio-rtl8168-window-data-quirk", 4);
1146     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1147                                         0x70, &quirk->mem[1], 1);
1148 
1149     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1150 
1151     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1152 }
1153 
1154 #define IGD_ASLS 0xfc /* ASL Storage Register */
1155 
1156 /*
1157  * The OpRegion includes the Video BIOS Table, which seems important for
1158  * telling the driver what sort of outputs it has.  Without this, the device
1159  * may work in the guest, but we may not get output.  This also requires BIOS
1160  * support to reserve and populate a section of guest memory sufficient for
1161  * the table and to write the base address of that memory to the ASLS register
1162  * of the IGD device.
1163  */
1164 int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
1165                                struct vfio_region_info *info, Error **errp)
1166 {
1167     int ret;
1168 
1169     vdev->igd_opregion = g_malloc0(info->size);
1170     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1171                 info->size, info->offset);
1172     if (ret != info->size) {
1173         error_setg(errp, "failed to read IGD OpRegion");
1174         g_free(vdev->igd_opregion);
1175         vdev->igd_opregion = NULL;
1176         return -EINVAL;
1177     }
1178 
1179     /*
1180      * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1181      * allocate 32bit reserved memory for, copy these contents into, and write
1182      * the reserved memory base address to the device ASLS register at 0xFC.
1183      * Alignment of this reserved region seems flexible, but using a 4k page
1184      * alignment seems to work well.  This interface assumes a single IGD
1185      * device, which may be at VM address 00:02.0 in legacy mode or another
1186      * address in UPT mode.
1187      *
1188      * NB, there may be future use cases discovered where the VM should have
1189      * direct interaction with the host OpRegion, in which case the write to
1190      * the ASLS register would trigger MemoryRegion setup to enable that.
1191      */
1192     fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1193                     vdev->igd_opregion, info->size);
1194 
1195     trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1196 
1197     pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1198     pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1199     pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1200 
1201     return 0;
1202 }
1203 
1204 /*
1205  * Common quirk probe entry points.
1206  */
1207 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1208 {
1209     vfio_vga_probe_ati_3c3_quirk(vdev);
1210     vfio_vga_probe_nvidia_3d0_quirk(vdev);
1211 }
1212 
1213 void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1214 {
1215     VFIOQuirk *quirk;
1216     int i, j;
1217 
1218     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1219         QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
1220             for (j = 0; j < quirk->nr_mem; j++) {
1221                 memory_region_del_subregion(&vdev->vga->region[i].mem,
1222                                             &quirk->mem[j]);
1223             }
1224         }
1225     }
1226 }
1227 
1228 void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1229 {
1230     int i, j;
1231 
1232     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1233         while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
1234             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1235             QLIST_REMOVE(quirk, next);
1236             for (j = 0; j < quirk->nr_mem; j++) {
1237                 object_unparent(OBJECT(&quirk->mem[j]));
1238             }
1239             g_free(quirk->mem);
1240             g_free(quirk->data);
1241             g_free(quirk);
1242         }
1243     }
1244 }
1245 
1246 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1247 {
1248     vfio_probe_ati_bar4_quirk(vdev, nr);
1249     vfio_probe_ati_bar2_quirk(vdev, nr);
1250     vfio_probe_nvidia_bar5_quirk(vdev, nr);
1251     vfio_probe_nvidia_bar0_quirk(vdev, nr);
1252     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1253 #ifdef CONFIG_VFIO_IGD
1254     vfio_probe_igd_bar4_quirk(vdev, nr);
1255 #endif
1256 }
1257 
1258 void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1259 {
1260     VFIOBAR *bar = &vdev->bars[nr];
1261     VFIOQuirk *quirk;
1262     int i;
1263 
1264     QLIST_FOREACH(quirk, &bar->quirks, next) {
1265         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
1266             vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1267         }
1268 
1269         for (i = 0; i < quirk->nr_mem; i++) {
1270             memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
1271         }
1272     }
1273 }
1274 
1275 void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1276 {
1277     VFIOBAR *bar = &vdev->bars[nr];
1278     int i;
1279 
1280     while (!QLIST_EMPTY(&bar->quirks)) {
1281         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1282         QLIST_REMOVE(quirk, next);
1283         for (i = 0; i < quirk->nr_mem; i++) {
1284             object_unparent(OBJECT(&quirk->mem[i]));
1285         }
1286         g_free(quirk->mem);
1287         g_free(quirk->data);
1288         g_free(quirk);
1289     }
1290 }
1291 
1292 /*
1293  * Reset quirks
1294  */
1295 void vfio_quirk_reset(VFIOPCIDevice *vdev)
1296 {
1297     int i;
1298 
1299     for (i = 0; i < PCI_ROM_SLOT; i++) {
1300         VFIOQuirk *quirk;
1301         VFIOBAR *bar = &vdev->bars[i];
1302 
1303         QLIST_FOREACH(quirk, &bar->quirks, next) {
1304             if (quirk->reset) {
1305                 quirk->reset(vdev, quirk);
1306             }
1307         }
1308     }
1309 }
1310 
1311 /*
1312  * AMD Radeon PCI config reset, based on Linux:
1313  *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1314  *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1315  *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1316  *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1317  * IDs: include/drm/drm_pciids.h
1318  * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1319  *
1320  * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1321  * hardware that should be fixed on future ASICs.  The symptom of this is that
1322  * once the accerlated driver loads, Windows guests will bsod on subsequent
1323  * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1324  * work around this, we do an AMD specific PCI config reset, followed by an SMC
1325  * reset.  The PCI config reset only works if SMC firmware is running, so we
1326  * have a dependency on the state of the device as to whether this reset will
1327  * be effective.  There are still cases where we won't be able to kick the
1328  * device into working, but this greatly improves the usability overall.  The
1329  * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1330  * poking is largely ASIC specific.
1331  */
1332 static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1333 {
1334     uint32_t clk, pc_c;
1335 
1336     /*
1337      * Registers 200h and 204h are index and data registers for accessing
1338      * indirect configuration registers within the device.
1339      */
1340     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1341     clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1342     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1343     pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1344 
1345     return (!(clk & 1) && (0x20100 <= pc_c));
1346 }
1347 
1348 /*
1349  * The scope of a config reset is controlled by a mode bit in the misc register
1350  * and a fuse, exposed as a bit in another register.  The fuse is the default
1351  * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1352  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1353  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1354  * to flip the value of the bit in the misc register.
1355  */
1356 static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1357 {
1358     uint32_t misc, fuse;
1359     bool a, b;
1360 
1361     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1362     fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1363     b = fuse & 64;
1364 
1365     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1366     misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1367     a = misc & 2;
1368 
1369     if (a == b) {
1370         vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1371         vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1372     }
1373 }
1374 
1375 static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1376 {
1377     PCIDevice *pdev = &vdev->pdev;
1378     int i, ret = 0;
1379     uint32_t data;
1380 
1381     /* Defer to a kernel implemented reset */
1382     if (vdev->vbasedev.reset_works) {
1383         trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1384         return -ENODEV;
1385     }
1386 
1387     /* Enable only memory BAR access */
1388     vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1389 
1390     /* Reset only works if SMC firmware is loaded and running */
1391     if (!vfio_radeon_smc_is_running(vdev)) {
1392         ret = -EINVAL;
1393         trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1394         goto out;
1395     }
1396 
1397     /* Make sure only the GFX function is reset */
1398     vfio_radeon_set_gfx_only_reset(vdev);
1399 
1400     /* AMD PCI config reset */
1401     vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1402     usleep(100);
1403 
1404     /* Read back the memory size to make sure we're out of reset */
1405     for (i = 0; i < 100000; i++) {
1406         if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1407             goto reset_smc;
1408         }
1409         usleep(1);
1410     }
1411 
1412     trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1413 
1414 reset_smc:
1415     /* Reset SMC */
1416     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1417     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1418     data |= 1;
1419     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1420 
1421     /* Disable SMC clock */
1422     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1423     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1424     data |= 1;
1425     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1426 
1427     trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1428 
1429 out:
1430     /* Restore PCI command register */
1431     vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1432 
1433     return ret;
1434 }
1435 
1436 void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1437 {
1438     switch (vdev->vendor_id) {
1439     case 0x1002:
1440         switch (vdev->device_id) {
1441         /* Bonaire */
1442         case 0x6649: /* Bonaire [FirePro W5100] */
1443         case 0x6650:
1444         case 0x6651:
1445         case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1446         case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1447         case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1448         /* Hawaii */
1449         case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1450         case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1451         case 0x67A2:
1452         case 0x67A8:
1453         case 0x67A9:
1454         case 0x67AA:
1455         case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1456         case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1457         case 0x67B8:
1458         case 0x67B9:
1459         case 0x67BA:
1460         case 0x67BE:
1461             vdev->resetfn = vfio_radeon_reset;
1462             trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1463             break;
1464         }
1465         break;
1466     }
1467 }
1468 
1469 /*
1470  * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1471  * devices as a member of a clique.  Devices within the same clique ID
1472  * are capable of direct P2P.  It's the user's responsibility that this
1473  * is correct.  The spec says that this may reside at any unused config
1474  * offset, but reserves and recommends hypervisors place this at C8h.
1475  * The spec also states that the hypervisor should place this capability
1476  * at the end of the capability list, thus next is defined as 0h.
1477  *
1478  * +----------------+----------------+----------------+----------------+
1479  * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1480  * +----------------+----------------+----------------+----------------+
1481  * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1482  * +---------------------------------+---------------------------------+
1483  *
1484  * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1485  */
1486 static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1487                                        const char *name, void *opaque,
1488                                        Error **errp)
1489 {
1490     DeviceState *dev = DEVICE(obj);
1491     Property *prop = opaque;
1492     uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
1493 
1494     visit_type_uint8(v, name, ptr, errp);
1495 }
1496 
1497 static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1498                                        const char *name, void *opaque,
1499                                        Error **errp)
1500 {
1501     DeviceState *dev = DEVICE(obj);
1502     Property *prop = opaque;
1503     uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
1504     Error *local_err = NULL;
1505 
1506     if (dev->realized) {
1507         qdev_prop_set_after_realize(dev, name, errp);
1508         return;
1509     }
1510 
1511     visit_type_uint8(v, name, &value, &local_err);
1512     if (local_err) {
1513         error_propagate(errp, local_err);
1514         return;
1515     }
1516 
1517     if (value & ~0xF) {
1518         error_setg(errp, "Property %s: valid range 0-15", name);
1519         return;
1520     }
1521 
1522     *ptr = value;
1523 }
1524 
1525 const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1526     .name = "uint4",
1527     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1528     .get = get_nv_gpudirect_clique_id,
1529     .set = set_nv_gpudirect_clique_id,
1530 };
1531 
1532 static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1533 {
1534     PCIDevice *pdev = &vdev->pdev;
1535     int ret, pos = 0xC8;
1536 
1537     if (vdev->nv_gpudirect_clique == 0xFF) {
1538         return 0;
1539     }
1540 
1541     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1542         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
1543         return -EINVAL;
1544     }
1545 
1546     if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1547         PCI_BASE_CLASS_DISPLAY) {
1548         error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
1549         return -EINVAL;
1550     }
1551 
1552     ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1553     if (ret < 0) {
1554         error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
1555         return ret;
1556     }
1557 
1558     memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1559     pos += PCI_CAP_FLAGS;
1560     pci_set_byte(pdev->config + pos++, 8);
1561     pci_set_byte(pdev->config + pos++, 'P');
1562     pci_set_byte(pdev->config + pos++, '2');
1563     pci_set_byte(pdev->config + pos++, 'P');
1564     pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1565     pci_set_byte(pdev->config + pos, 0);
1566 
1567     return 0;
1568 }
1569 
1570 int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1571 {
1572     int ret;
1573 
1574     ret = vfio_add_nv_gpudirect_cap(vdev, errp);
1575     if (ret) {
1576         return ret;
1577     }
1578 
1579     return 0;
1580 }
1581 
1582 static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
1583                                      const char *name,
1584                                      void *opaque, Error **errp)
1585 {
1586     uint64_t tgt = (uintptr_t) opaque;
1587     visit_type_uint64(v, name, &tgt, errp);
1588 }
1589 
1590 static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
1591                                                  const char *name,
1592                                                  void *opaque, Error **errp)
1593 {
1594     uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
1595     visit_type_uint32(v, name, &link_speed, errp);
1596 }
1597 
1598 int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
1599 {
1600     int ret;
1601     void *p;
1602     struct vfio_region_info *nv2reg = NULL;
1603     struct vfio_info_cap_header *hdr;
1604     struct vfio_region_info_cap_nvlink2_ssatgt *cap;
1605     VFIOQuirk *quirk;
1606 
1607     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1608                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1609                                    PCI_VENDOR_ID_NVIDIA,
1610                                    VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
1611                                    &nv2reg);
1612     if (ret) {
1613         return ret;
1614     }
1615 
1616     hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1617     if (!hdr) {
1618         ret = -ENODEV;
1619         goto free_exit;
1620     }
1621     cap = (void *) hdr;
1622 
1623     p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE,
1624              MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
1625     if (p == MAP_FAILED) {
1626         ret = -errno;
1627         goto free_exit;
1628     }
1629 
1630     quirk = vfio_quirk_alloc(1);
1631     memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
1632                                nv2reg->size, p);
1633     QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1634 
1635     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1636                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1637                         (void *) (uintptr_t) cap->tgt);
1638     trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
1639                                           nv2reg->size);
1640 free_exit:
1641     g_free(nv2reg);
1642 
1643     return ret;
1644 }
1645 
1646 int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
1647 {
1648     int ret;
1649     void *p;
1650     struct vfio_region_info *atsdreg = NULL;
1651     struct vfio_info_cap_header *hdr;
1652     struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
1653     struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
1654     VFIOQuirk *quirk;
1655 
1656     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1657                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1658                                    PCI_VENDOR_ID_IBM,
1659                                    VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
1660                                    &atsdreg);
1661     if (ret) {
1662         return ret;
1663     }
1664 
1665     hdr = vfio_get_region_info_cap(atsdreg,
1666                                    VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1667     if (!hdr) {
1668         ret = -ENODEV;
1669         goto free_exit;
1670     }
1671     captgt = (void *) hdr;
1672 
1673     hdr = vfio_get_region_info_cap(atsdreg,
1674                                    VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
1675     if (!hdr) {
1676         ret = -ENODEV;
1677         goto free_exit;
1678     }
1679     capspeed = (void *) hdr;
1680 
1681     /* Some NVLink bridges may not have assigned ATSD */
1682     if (atsdreg->size) {
1683         p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE,
1684                  MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
1685         if (p == MAP_FAILED) {
1686             ret = -errno;
1687             goto free_exit;
1688         }
1689 
1690         quirk = vfio_quirk_alloc(1);
1691         memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
1692                                           "nvlink2-atsd-mr", atsdreg->size, p);
1693         QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1694     }
1695 
1696     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1697                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1698                         (void *) (uintptr_t) captgt->tgt);
1699     trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
1700                                               atsdreg->size);
1701 
1702     object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
1703                         vfio_pci_nvlink2_get_link_speed, NULL, NULL,
1704                         (void *) (uintptr_t) capspeed->link_speed);
1705     trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
1706                                               capspeed->link_speed);
1707 free_exit:
1708     g_free(atsdreg);
1709 
1710     return ret;
1711 }
1712