122c37a10SEric Auger /*
222c37a10SEric Auger * virtio-iommu device
322c37a10SEric Auger *
422c37a10SEric Auger * Copyright (c) 2020 Red Hat, Inc.
522c37a10SEric Auger *
622c37a10SEric Auger * This program is free software; you can redistribute it and/or modify it
722c37a10SEric Auger * under the terms and conditions of the GNU General Public License,
822c37a10SEric Auger * version 2 or later, as published by the Free Software Foundation.
922c37a10SEric Auger *
1022c37a10SEric Auger * This program is distributed in the hope it will be useful, but WITHOUT
1122c37a10SEric Auger * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1222c37a10SEric Auger * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
1322c37a10SEric Auger * more details.
1422c37a10SEric Auger *
1522c37a10SEric Auger * You should have received a copy of the GNU General Public License along with
1622c37a10SEric Auger * this program. If not, see <http://www.gnu.org/licenses/>.
1722c37a10SEric Auger *
1822c37a10SEric Auger */
1922c37a10SEric Auger
2022c37a10SEric Auger #include "qemu/osdep.h"
21fe2cacaeSEric Auger #include "qemu/log.h"
2222c37a10SEric Auger #include "qemu/iov.h"
2330d40e39SEric Auger #include "qemu/range.h"
245c476ba3SEric Auger #include "qemu/reserved-region.h"
25e414ed2cSPhilippe Mathieu-Daudé #include "exec/target_page.h"
2622c37a10SEric Auger #include "hw/qdev-properties.h"
2722c37a10SEric Auger #include "hw/virtio/virtio.h"
2822c37a10SEric Auger #include "sysemu/kvm.h"
29448179e3SJean-Philippe Brucker #include "sysemu/reset.h"
3094df5b21SEric Auger #include "sysemu/sysemu.h"
31908cae0dSEric Auger #include "qemu/reserved-region.h"
32294ac5feSEric Auger #include "qemu/units.h"
33cfb42188SEric Auger #include "qapi/error.h"
34cfb42188SEric Auger #include "qemu/error-report.h"
3522c37a10SEric Auger #include "trace.h"
3622c37a10SEric Auger
3722c37a10SEric Auger #include "standard-headers/linux/virtio_ids.h"
3822c37a10SEric Auger
3922c37a10SEric Auger #include "hw/virtio/virtio-bus.h"
4022c37a10SEric Auger #include "hw/virtio/virtio-iommu.h"
41cfb42188SEric Auger #include "hw/pci/pci_bus.h"
42cfb42188SEric Auger #include "hw/pci/pci.h"
4322c37a10SEric Auger
4422c37a10SEric Auger /* Max size */
4522c37a10SEric Auger #define VIOMMU_DEFAULT_QUEUE_SIZE 256
461733eebbSEric Auger #define VIOMMU_PROBE_SIZE 512
4722c37a10SEric Auger
48cfb42188SEric Auger typedef struct VirtIOIOMMUDomain {
49cfb42188SEric Auger uint32_t id;
50d9c96f24SJean-Philippe Brucker bool bypass;
51cfb42188SEric Auger GTree *mappings;
52cfb42188SEric Auger QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
53cfb42188SEric Auger } VirtIOIOMMUDomain;
54cfb42188SEric Auger
55cfb42188SEric Auger typedef struct VirtIOIOMMUEndpoint {
56cfb42188SEric Auger uint32_t id;
57cfb42188SEric Auger VirtIOIOMMUDomain *domain;
5831aa323fSJean-Philippe Brucker IOMMUMemoryRegion *iommu_mr;
59cfb42188SEric Auger QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
60cfb42188SEric Auger } VirtIOIOMMUEndpoint;
61cfb42188SEric Auger
62cfb42188SEric Auger typedef struct VirtIOIOMMUInterval {
63cfb42188SEric Auger uint64_t low;
64cfb42188SEric Auger uint64_t high;
65cfb42188SEric Auger } VirtIOIOMMUInterval;
66cfb42188SEric Auger
67fe2cacaeSEric Auger typedef struct VirtIOIOMMUMapping {
68fe2cacaeSEric Auger uint64_t phys_addr;
69fe2cacaeSEric Auger uint32_t flags;
70fe2cacaeSEric Auger } VirtIOIOMMUMapping;
71fe2cacaeSEric Auger
72817ef10dSEric Auger struct hiod_key {
73817ef10dSEric Auger PCIBus *bus;
74817ef10dSEric Auger uint8_t devfn;
75817ef10dSEric Auger };
76817ef10dSEric Auger
virtio_iommu_get_bdf(IOMMUDevice * dev)77cfb42188SEric Auger static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
78cfb42188SEric Auger {
79cfb42188SEric Auger return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
80cfb42188SEric Auger }
81cfb42188SEric Auger
virtio_iommu_device_bypassed(IOMMUDevice * sdev)8290519b90SZhenzhong Duan static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
8390519b90SZhenzhong Duan {
8490519b90SZhenzhong Duan uint32_t sid;
8590519b90SZhenzhong Duan bool bypassed;
8690519b90SZhenzhong Duan VirtIOIOMMU *s = sdev->viommu;
8790519b90SZhenzhong Duan VirtIOIOMMUEndpoint *ep;
8890519b90SZhenzhong Duan
8990519b90SZhenzhong Duan sid = virtio_iommu_get_bdf(sdev);
9090519b90SZhenzhong Duan
9108f2030aSZhenzhong Duan qemu_rec_mutex_lock(&s->mutex);
9290519b90SZhenzhong Duan /* need to check bypass before system reset */
9390519b90SZhenzhong Duan if (!s->endpoints) {
9490519b90SZhenzhong Duan bypassed = s->config.bypass;
9590519b90SZhenzhong Duan goto unlock;
9690519b90SZhenzhong Duan }
9790519b90SZhenzhong Duan
9890519b90SZhenzhong Duan ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
9990519b90SZhenzhong Duan if (!ep || !ep->domain) {
10090519b90SZhenzhong Duan bypassed = s->config.bypass;
10190519b90SZhenzhong Duan } else {
10290519b90SZhenzhong Duan bypassed = ep->domain->bypass;
10390519b90SZhenzhong Duan }
10490519b90SZhenzhong Duan
10590519b90SZhenzhong Duan unlock:
10608f2030aSZhenzhong Duan qemu_rec_mutex_unlock(&s->mutex);
10790519b90SZhenzhong Duan return bypassed;
10890519b90SZhenzhong Duan }
10990519b90SZhenzhong Duan
11090519b90SZhenzhong Duan /* Return whether the device is using IOMMU translation. */
virtio_iommu_switch_address_space(IOMMUDevice * sdev)11190519b90SZhenzhong Duan static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
11290519b90SZhenzhong Duan {
11390519b90SZhenzhong Duan bool use_remapping;
11490519b90SZhenzhong Duan
11590519b90SZhenzhong Duan assert(sdev);
11690519b90SZhenzhong Duan
11790519b90SZhenzhong Duan use_remapping = !virtio_iommu_device_bypassed(sdev);
11890519b90SZhenzhong Duan
11990519b90SZhenzhong Duan trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
12090519b90SZhenzhong Duan PCI_SLOT(sdev->devfn),
12190519b90SZhenzhong Duan PCI_FUNC(sdev->devfn),
12290519b90SZhenzhong Duan use_remapping);
12390519b90SZhenzhong Duan
12490519b90SZhenzhong Duan /* Turn off first then on the other */
12590519b90SZhenzhong Duan if (use_remapping) {
12690519b90SZhenzhong Duan memory_region_set_enabled(&sdev->bypass_mr, false);
12790519b90SZhenzhong Duan memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
12890519b90SZhenzhong Duan } else {
12990519b90SZhenzhong Duan memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
13090519b90SZhenzhong Duan memory_region_set_enabled(&sdev->bypass_mr, true);
13190519b90SZhenzhong Duan }
13290519b90SZhenzhong Duan
13390519b90SZhenzhong Duan return use_remapping;
13490519b90SZhenzhong Duan }
13590519b90SZhenzhong Duan
virtio_iommu_switch_address_space_all(VirtIOIOMMU * s)13690519b90SZhenzhong Duan static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
13790519b90SZhenzhong Duan {
13890519b90SZhenzhong Duan GHashTableIter iter;
13990519b90SZhenzhong Duan IOMMUPciBus *iommu_pci_bus;
14090519b90SZhenzhong Duan int i;
14190519b90SZhenzhong Duan
14290519b90SZhenzhong Duan g_hash_table_iter_init(&iter, s->as_by_busptr);
14390519b90SZhenzhong Duan while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
14490519b90SZhenzhong Duan for (i = 0; i < PCI_DEVFN_MAX; i++) {
14590519b90SZhenzhong Duan if (!iommu_pci_bus->pbdev[i]) {
14690519b90SZhenzhong Duan continue;
14790519b90SZhenzhong Duan }
14890519b90SZhenzhong Duan virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
14990519b90SZhenzhong Duan }
15090519b90SZhenzhong Duan }
15190519b90SZhenzhong Duan }
15290519b90SZhenzhong Duan
153cfb42188SEric Auger /**
154cfb42188SEric Auger * The bus number is used for lookup when SID based operations occur.
155cfb42188SEric Auger * In that case we lazily populate the IOMMUPciBus array from the bus hash
156cfb42188SEric Auger * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
157cfb42188SEric Auger * numbers may not be always initialized yet.
158cfb42188SEric Auger */
iommu_find_iommu_pcibus(VirtIOIOMMU * s,uint8_t bus_num)159cfb42188SEric Auger static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
160cfb42188SEric Auger {
161cfb42188SEric Auger IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
162cfb42188SEric Auger
163cfb42188SEric Auger if (!iommu_pci_bus) {
164cfb42188SEric Auger GHashTableIter iter;
165cfb42188SEric Auger
166cfb42188SEric Auger g_hash_table_iter_init(&iter, s->as_by_busptr);
167cfb42188SEric Auger while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
168cfb42188SEric Auger if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
169cfb42188SEric Auger s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
170cfb42188SEric Auger return iommu_pci_bus;
171cfb42188SEric Auger }
172cfb42188SEric Auger }
173cfb42188SEric Auger return NULL;
174cfb42188SEric Auger }
175cfb42188SEric Auger return iommu_pci_bus;
176cfb42188SEric Auger }
177cfb42188SEric Auger
virtio_iommu_mr(VirtIOIOMMU * s,uint32_t sid)178cfb42188SEric Auger static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
179cfb42188SEric Auger {
180cfb42188SEric Auger uint8_t bus_n, devfn;
181cfb42188SEric Auger IOMMUPciBus *iommu_pci_bus;
182cfb42188SEric Auger IOMMUDevice *dev;
183cfb42188SEric Auger
184cfb42188SEric Auger bus_n = PCI_BUS_NUM(sid);
185cfb42188SEric Auger iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
186cfb42188SEric Auger if (iommu_pci_bus) {
187bfe7a961SJean-Philippe Brucker devfn = sid & (PCI_DEVFN_MAX - 1);
188cfb42188SEric Auger dev = iommu_pci_bus->pbdev[devfn];
189cfb42188SEric Auger if (dev) {
190cfb42188SEric Auger return &dev->iommu_mr;
191cfb42188SEric Auger }
192cfb42188SEric Auger }
193cfb42188SEric Auger return NULL;
194cfb42188SEric Auger }
195cfb42188SEric Auger
interval_cmp(gconstpointer a,gconstpointer b,gpointer user_data)196cfb42188SEric Auger static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
197cfb42188SEric Auger {
198cfb42188SEric Auger VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
199cfb42188SEric Auger VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
200cfb42188SEric Auger
201cfb42188SEric Auger if (inta->high < intb->low) {
202cfb42188SEric Auger return -1;
203cfb42188SEric Auger } else if (intb->high < inta->low) {
204cfb42188SEric Auger return 1;
205cfb42188SEric Auger } else {
206cfb42188SEric Auger return 0;
207cfb42188SEric Auger }
208cfb42188SEric Auger }
209cfb42188SEric Auger
virtio_iommu_notify_map_unmap(IOMMUMemoryRegion * mr,IOMMUTLBEvent * event,hwaddr virt_start,hwaddr virt_end)2100522be9aSJean-Philippe Brucker static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
2110522be9aSJean-Philippe Brucker IOMMUTLBEvent *event,
2120522be9aSJean-Philippe Brucker hwaddr virt_start, hwaddr virt_end)
2130522be9aSJean-Philippe Brucker {
2140522be9aSJean-Philippe Brucker uint64_t delta = virt_end - virt_start;
2150522be9aSJean-Philippe Brucker
2160522be9aSJean-Philippe Brucker event->entry.iova = virt_start;
2170522be9aSJean-Philippe Brucker event->entry.addr_mask = delta;
2180522be9aSJean-Philippe Brucker
2190522be9aSJean-Philippe Brucker if (delta == UINT64_MAX) {
2200522be9aSJean-Philippe Brucker memory_region_notify_iommu(mr, 0, *event);
2210522be9aSJean-Philippe Brucker }
2220522be9aSJean-Philippe Brucker
2230522be9aSJean-Philippe Brucker while (virt_start != virt_end + 1) {
2240522be9aSJean-Philippe Brucker uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
2250522be9aSJean-Philippe Brucker
2260522be9aSJean-Philippe Brucker event->entry.addr_mask = mask;
2270522be9aSJean-Philippe Brucker event->entry.iova = virt_start;
2280522be9aSJean-Philippe Brucker memory_region_notify_iommu(mr, 0, *event);
2290522be9aSJean-Philippe Brucker virt_start += mask + 1;
2300522be9aSJean-Philippe Brucker if (event->entry.perm != IOMMU_NONE) {
2310522be9aSJean-Philippe Brucker event->entry.translated_addr += mask + 1;
2320522be9aSJean-Philippe Brucker }
2330522be9aSJean-Philippe Brucker }
2340522be9aSJean-Philippe Brucker }
2350522be9aSJean-Philippe Brucker
virtio_iommu_notify_map(IOMMUMemoryRegion * mr,hwaddr virt_start,hwaddr virt_end,hwaddr paddr,uint32_t flags)23615e4c8f0SBharat Bhushan static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
23715e4c8f0SBharat Bhushan hwaddr virt_end, hwaddr paddr,
23815e4c8f0SBharat Bhushan uint32_t flags)
23915e4c8f0SBharat Bhushan {
2405039caf3SEugenio Pérez IOMMUTLBEvent event;
24115e4c8f0SBharat Bhushan IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
24215e4c8f0SBharat Bhushan flags & VIRTIO_IOMMU_MAP_F_WRITE);
24315e4c8f0SBharat Bhushan
24415e4c8f0SBharat Bhushan if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
24515e4c8f0SBharat Bhushan (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
24615e4c8f0SBharat Bhushan return;
24715e4c8f0SBharat Bhushan }
24815e4c8f0SBharat Bhushan
24915e4c8f0SBharat Bhushan trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
25015e4c8f0SBharat Bhushan paddr, perm);
25115e4c8f0SBharat Bhushan
2525039caf3SEugenio Pérez event.type = IOMMU_NOTIFIER_MAP;
2535039caf3SEugenio Pérez event.entry.target_as = &address_space_memory;
2545039caf3SEugenio Pérez event.entry.perm = perm;
2555039caf3SEugenio Pérez event.entry.translated_addr = paddr;
25615e4c8f0SBharat Bhushan
2570522be9aSJean-Philippe Brucker virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
25815e4c8f0SBharat Bhushan }
25915e4c8f0SBharat Bhushan
virtio_iommu_notify_unmap(IOMMUMemoryRegion * mr,hwaddr virt_start,hwaddr virt_end)26015e4c8f0SBharat Bhushan static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
26115e4c8f0SBharat Bhushan hwaddr virt_end)
26215e4c8f0SBharat Bhushan {
2635039caf3SEugenio Pérez IOMMUTLBEvent event;
26415e4c8f0SBharat Bhushan
26515e4c8f0SBharat Bhushan if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
26615e4c8f0SBharat Bhushan return;
26715e4c8f0SBharat Bhushan }
26815e4c8f0SBharat Bhushan
26915e4c8f0SBharat Bhushan trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
27015e4c8f0SBharat Bhushan
2715039caf3SEugenio Pérez event.type = IOMMU_NOTIFIER_UNMAP;
2725039caf3SEugenio Pérez event.entry.target_as = &address_space_memory;
2735039caf3SEugenio Pérez event.entry.perm = IOMMU_NONE;
2745039caf3SEugenio Pérez event.entry.translated_addr = 0;
27515e4c8f0SBharat Bhushan
2760522be9aSJean-Philippe Brucker virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
277dde3f08bSEric Auger }
278dde3f08bSEric Auger
virtio_iommu_notify_unmap_cb(gpointer key,gpointer value,gpointer data)2792f6eeb5fSBharat Bhushan static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
2802f6eeb5fSBharat Bhushan gpointer data)
2812f6eeb5fSBharat Bhushan {
2822f6eeb5fSBharat Bhushan VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
2832f6eeb5fSBharat Bhushan IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
2842f6eeb5fSBharat Bhushan
2852f6eeb5fSBharat Bhushan virtio_iommu_notify_unmap(mr, interval->low, interval->high);
2862f6eeb5fSBharat Bhushan
2872f6eeb5fSBharat Bhushan return false;
2882f6eeb5fSBharat Bhushan }
2892f6eeb5fSBharat Bhushan
virtio_iommu_notify_map_cb(gpointer key,gpointer value,gpointer data)2902f6eeb5fSBharat Bhushan static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
2912f6eeb5fSBharat Bhushan gpointer data)
2922f6eeb5fSBharat Bhushan {
2932f6eeb5fSBharat Bhushan VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
2942f6eeb5fSBharat Bhushan VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
2952f6eeb5fSBharat Bhushan IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
2962f6eeb5fSBharat Bhushan
2972f6eeb5fSBharat Bhushan virtio_iommu_notify_map(mr, interval->low, interval->high,
2982f6eeb5fSBharat Bhushan mapping->phys_addr, mapping->flags);
2992f6eeb5fSBharat Bhushan
3002f6eeb5fSBharat Bhushan return false;
3012f6eeb5fSBharat Bhushan }
3022f6eeb5fSBharat Bhushan
virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint * ep)303cfb42188SEric Auger static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
304cfb42188SEric Auger {
3052f6eeb5fSBharat Bhushan VirtIOIOMMUDomain *domain = ep->domain;
30690519b90SZhenzhong Duan IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
3072f6eeb5fSBharat Bhushan
308cfb42188SEric Auger if (!ep->domain) {
309cfb42188SEric Auger return;
310cfb42188SEric Auger }
311*6c027a9dSEric Auger trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id);
3122f6eeb5fSBharat Bhushan g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
3132f6eeb5fSBharat Bhushan ep->iommu_mr);
314cfb42188SEric Auger QLIST_REMOVE(ep, next);
315cfb42188SEric Auger ep->domain = NULL;
31690519b90SZhenzhong Duan virtio_iommu_switch_address_space(sdev);
317cfb42188SEric Auger }
318cfb42188SEric Auger
virtio_iommu_get_endpoint(VirtIOIOMMU * s,uint32_t ep_id)319cfb42188SEric Auger static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
320cfb42188SEric Auger uint32_t ep_id)
321cfb42188SEric Auger {
322cfb42188SEric Auger VirtIOIOMMUEndpoint *ep;
32331aa323fSJean-Philippe Brucker IOMMUMemoryRegion *mr;
324cfb42188SEric Auger
325cfb42188SEric Auger ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
326cfb42188SEric Auger if (ep) {
327cfb42188SEric Auger return ep;
328cfb42188SEric Auger }
32931aa323fSJean-Philippe Brucker mr = virtio_iommu_mr(s, ep_id);
33031aa323fSJean-Philippe Brucker if (!mr) {
331cfb42188SEric Auger return NULL;
332cfb42188SEric Auger }
333cfb42188SEric Auger ep = g_malloc0(sizeof(*ep));
334cfb42188SEric Auger ep->id = ep_id;
33531aa323fSJean-Philippe Brucker ep->iommu_mr = mr;
336cfb42188SEric Auger trace_virtio_iommu_get_endpoint(ep_id);
337cfb42188SEric Auger g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
338cfb42188SEric Auger return ep;
339cfb42188SEric Auger }
340cfb42188SEric Auger
virtio_iommu_put_endpoint(gpointer data)341cfb42188SEric Auger static void virtio_iommu_put_endpoint(gpointer data)
342cfb42188SEric Auger {
343cfb42188SEric Auger VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
344cfb42188SEric Auger
345cfb42188SEric Auger if (ep->domain) {
346cfb42188SEric Auger virtio_iommu_detach_endpoint_from_domain(ep);
347cfb42188SEric Auger }
348cfb42188SEric Auger
349cfb42188SEric Auger trace_virtio_iommu_put_endpoint(ep->id);
350cfb42188SEric Auger g_free(ep);
351cfb42188SEric Auger }
352cfb42188SEric Auger
virtio_iommu_get_domain(VirtIOIOMMU * s,uint32_t domain_id,bool bypass)353cfb42188SEric Auger static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
354d9c96f24SJean-Philippe Brucker uint32_t domain_id,
355d9c96f24SJean-Philippe Brucker bool bypass)
356cfb42188SEric Auger {
357cfb42188SEric Auger VirtIOIOMMUDomain *domain;
358cfb42188SEric Auger
359cfb42188SEric Auger domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
360cfb42188SEric Auger if (domain) {
361d9c96f24SJean-Philippe Brucker if (domain->bypass != bypass) {
362d9c96f24SJean-Philippe Brucker return NULL;
363d9c96f24SJean-Philippe Brucker }
364cfb42188SEric Auger return domain;
365cfb42188SEric Auger }
366cfb42188SEric Auger domain = g_malloc0(sizeof(*domain));
367cfb42188SEric Auger domain->id = domain_id;
368cfb42188SEric Auger domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
369cfb42188SEric Auger NULL, (GDestroyNotify)g_free,
370cfb42188SEric Auger (GDestroyNotify)g_free);
371d9c96f24SJean-Philippe Brucker domain->bypass = bypass;
372cfb42188SEric Auger g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
373cfb42188SEric Auger QLIST_INIT(&domain->endpoint_list);
374cfb42188SEric Auger trace_virtio_iommu_get_domain(domain_id);
375cfb42188SEric Auger return domain;
376cfb42188SEric Auger }
377cfb42188SEric Auger
virtio_iommu_put_domain(gpointer data)378cfb42188SEric Auger static void virtio_iommu_put_domain(gpointer data)
379cfb42188SEric Auger {
380cfb42188SEric Auger VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
381cfb42188SEric Auger VirtIOIOMMUEndpoint *iter, *tmp;
382cfb42188SEric Auger
383cfb42188SEric Auger QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
384cfb42188SEric Auger virtio_iommu_detach_endpoint_from_domain(iter);
385cfb42188SEric Auger }
386cfb42188SEric Auger g_tree_destroy(domain->mappings);
387cfb42188SEric Auger trace_virtio_iommu_put_domain(domain->id);
388cfb42188SEric Auger g_free(domain);
389cfb42188SEric Auger }
390cfb42188SEric Auger
add_prop_resv_regions(IOMMUDevice * sdev)391908cae0dSEric Auger static void add_prop_resv_regions(IOMMUDevice *sdev)
392908cae0dSEric Auger {
393908cae0dSEric Auger VirtIOIOMMU *s = sdev->viommu;
394908cae0dSEric Auger int i;
395908cae0dSEric Auger
396908cae0dSEric Auger for (i = 0; i < s->nr_prop_resv_regions; i++) {
397908cae0dSEric Auger ReservedRegion *reg = g_new0(ReservedRegion, 1);
398908cae0dSEric Auger
399908cae0dSEric Auger *reg = s->prop_resv_regions[i];
400908cae0dSEric Auger sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
401908cae0dSEric Auger }
402908cae0dSEric Auger }
403908cae0dSEric Auger
virtio_iommu_find_add_as(PCIBus * bus,void * opaque,int devfn)404cfb42188SEric Auger static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
405cfb42188SEric Auger int devfn)
406cfb42188SEric Auger {
407cfb42188SEric Auger VirtIOIOMMU *s = opaque;
408cfb42188SEric Auger IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
409cfb42188SEric Auger static uint32_t mr_index;
410cfb42188SEric Auger IOMMUDevice *sdev;
411cfb42188SEric Auger
412cfb42188SEric Auger if (!sbus) {
413cfb42188SEric Auger sbus = g_malloc0(sizeof(IOMMUPciBus) +
414cfb42188SEric Auger sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
415cfb42188SEric Auger sbus->bus = bus;
416cfb42188SEric Auger g_hash_table_insert(s->as_by_busptr, bus, sbus);
417cfb42188SEric Auger }
418cfb42188SEric Auger
419cfb42188SEric Auger sdev = sbus->pbdev[devfn];
420cfb42188SEric Auger if (!sdev) {
421cfb42188SEric Auger char *name = g_strdup_printf("%s-%d-%d",
422cfb42188SEric Auger TYPE_VIRTIO_IOMMU_MEMORY_REGION,
423cfb42188SEric Auger mr_index++, devfn);
424b21e2380SMarkus Armbruster sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
425cfb42188SEric Auger
426cfb42188SEric Auger sdev->viommu = s;
427cfb42188SEric Auger sdev->bus = bus;
428cfb42188SEric Auger sdev->devfn = devfn;
429cfb42188SEric Auger
430cfb42188SEric Auger trace_virtio_iommu_init_iommu_mr(name);
431cfb42188SEric Auger
43290519b90SZhenzhong Duan memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
43390519b90SZhenzhong Duan address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
434908cae0dSEric Auger add_prop_resv_regions(sdev);
43590519b90SZhenzhong Duan
43690519b90SZhenzhong Duan /*
43790519b90SZhenzhong Duan * Build the IOMMU disabled container with aliases to the
43890519b90SZhenzhong Duan * shared MRs. Note that aliasing to a shared memory region
43990519b90SZhenzhong Duan * could help the memory API to detect same FlatViews so we
44090519b90SZhenzhong Duan * can have devices to share the same FlatView when in bypass
44190519b90SZhenzhong Duan * mode. (either by not configuring virtio-iommu driver or with
44290519b90SZhenzhong Duan * "iommu=pt"). It will greatly reduce the total number of
44390519b90SZhenzhong Duan * FlatViews of the system hence VM runs faster.
44490519b90SZhenzhong Duan */
44590519b90SZhenzhong Duan memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
44690519b90SZhenzhong Duan "system", get_system_memory(), 0,
44790519b90SZhenzhong Duan memory_region_size(get_system_memory()));
44890519b90SZhenzhong Duan
449cfb42188SEric Auger memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
450cfb42188SEric Auger TYPE_VIRTIO_IOMMU_MEMORY_REGION,
451cfb42188SEric Auger OBJECT(s), name,
452cfb42188SEric Auger UINT64_MAX);
45390519b90SZhenzhong Duan
45490519b90SZhenzhong Duan /*
45590519b90SZhenzhong Duan * Hook both the containers under the root container, we
45690519b90SZhenzhong Duan * switch between iommu & bypass MRs by enable/disable
45790519b90SZhenzhong Duan * corresponding sub-containers
45890519b90SZhenzhong Duan */
45990519b90SZhenzhong Duan memory_region_add_subregion_overlap(&sdev->root, 0,
46090519b90SZhenzhong Duan MEMORY_REGION(&sdev->iommu_mr),
46190519b90SZhenzhong Duan 0);
46290519b90SZhenzhong Duan memory_region_add_subregion_overlap(&sdev->root, 0,
46390519b90SZhenzhong Duan &sdev->bypass_mr, 0);
46490519b90SZhenzhong Duan
46590519b90SZhenzhong Duan virtio_iommu_switch_address_space(sdev);
466cfb42188SEric Auger g_free(name);
467cfb42188SEric Auger }
468cfb42188SEric Auger return &sdev->as;
469cfb42188SEric Auger }
470cfb42188SEric Auger
hiod_equal(gconstpointer v1,gconstpointer v2)471817ef10dSEric Auger static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
472817ef10dSEric Auger {
473817ef10dSEric Auger const struct hiod_key *key1 = v1;
474817ef10dSEric Auger const struct hiod_key *key2 = v2;
475817ef10dSEric Auger
476817ef10dSEric Auger return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
477817ef10dSEric Auger }
478817ef10dSEric Auger
hiod_hash(gconstpointer v)479817ef10dSEric Auger static guint hiod_hash(gconstpointer v)
480817ef10dSEric Auger {
481817ef10dSEric Auger const struct hiod_key *key = v;
482817ef10dSEric Auger guint value = (guint)(uintptr_t)key->bus;
483817ef10dSEric Auger
484817ef10dSEric Auger return (guint)(value << 8 | key->devfn);
485817ef10dSEric Auger }
486817ef10dSEric Auger
hiod_destroy(gpointer v)487817ef10dSEric Auger static void hiod_destroy(gpointer v)
488817ef10dSEric Auger {
489817ef10dSEric Auger object_unref(v);
490817ef10dSEric Auger }
491817ef10dSEric Auger
492817ef10dSEric Auger static HostIOMMUDevice *
get_host_iommu_device(VirtIOIOMMU * viommu,PCIBus * bus,int devfn)493817ef10dSEric Auger get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
494817ef10dSEric Auger struct hiod_key key = {
495817ef10dSEric Auger .bus = bus,
496817ef10dSEric Auger .devfn = devfn,
497817ef10dSEric Auger };
498817ef10dSEric Auger
499817ef10dSEric Auger return g_hash_table_lookup(viommu->host_iommu_devices, &key);
500817ef10dSEric Auger }
501817ef10dSEric Auger
502cf2647a7SEric Auger /**
503cf2647a7SEric Auger * rebuild_resv_regions: rebuild resv regions with both the
504cf2647a7SEric Auger * info of host resv ranges and property set resv ranges
505cf2647a7SEric Auger */
rebuild_resv_regions(IOMMUDevice * sdev)506cf2647a7SEric Auger static int rebuild_resv_regions(IOMMUDevice *sdev)
507cf2647a7SEric Auger {
508cf2647a7SEric Auger GList *l;
509cf2647a7SEric Auger int i = 0;
510cf2647a7SEric Auger
511cf2647a7SEric Auger /* free the existing list and rebuild it from scratch */
512cf2647a7SEric Auger g_list_free_full(sdev->resv_regions, g_free);
513cf2647a7SEric Auger sdev->resv_regions = NULL;
514cf2647a7SEric Auger
515cf2647a7SEric Auger /* First add host reserved regions if any, all tagged as RESERVED */
516cf2647a7SEric Auger for (l = sdev->host_resv_ranges; l; l = l->next) {
517cf2647a7SEric Auger ReservedRegion *reg = g_new0(ReservedRegion, 1);
518cf2647a7SEric Auger Range *r = (Range *)l->data;
519cf2647a7SEric Auger
520cf2647a7SEric Auger reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
521cf2647a7SEric Auger range_set_bounds(®->range, range_lob(r), range_upb(r));
522cf2647a7SEric Auger sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
523cf2647a7SEric Auger trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
524cf2647a7SEric Auger range_lob(®->range),
525cf2647a7SEric Auger range_upb(®->range));
526cf2647a7SEric Auger i++;
527cf2647a7SEric Auger }
528cf2647a7SEric Auger /*
529cf2647a7SEric Auger * then add higher priority reserved regions set by the machine
530cf2647a7SEric Auger * through properties
531cf2647a7SEric Auger */
532cf2647a7SEric Auger add_prop_resv_regions(sdev);
533cf2647a7SEric Auger return 0;
534cf2647a7SEric Auger }
535cf2647a7SEric Auger
virtio_iommu_set_host_iova_ranges(VirtIOIOMMU * s,PCIBus * bus,int devfn,GList * iova_ranges,Error ** errp)536cf2647a7SEric Auger static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
537cf2647a7SEric Auger int devfn, GList *iova_ranges,
538cf2647a7SEric Auger Error **errp)
539cf2647a7SEric Auger {
540cf2647a7SEric Auger IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
541cf2647a7SEric Auger IOMMUDevice *sdev;
542cf2647a7SEric Auger int ret = -EINVAL;
543cf2647a7SEric Auger
544cf2647a7SEric Auger if (!sbus) {
54537baedf8SEric Auger error_setg(errp, "%s: no IOMMUPciBus found!", __func__);
54637baedf8SEric Auger return ret;
547cf2647a7SEric Auger }
548cf2647a7SEric Auger
549cf2647a7SEric Auger sdev = sbus->pbdev[devfn];
55037baedf8SEric Auger if (!sdev) {
55137baedf8SEric Auger error_setg(errp, "%s: no IOMMUDevice found!", __func__);
55237baedf8SEric Auger return ret;
55337baedf8SEric Auger }
554cf2647a7SEric Auger
555cf2647a7SEric Auger if (sdev->host_resv_ranges) {
55662ac01d1SEric Auger error_setg(errp, "%s virtio-iommu does not support aliased BDF",
55762ac01d1SEric Auger __func__);
55862ac01d1SEric Auger return ret;
559cf2647a7SEric Auger }
560cf2647a7SEric Auger
561cf2647a7SEric Auger range_inverse_array(iova_ranges,
562cf2647a7SEric Auger &sdev->host_resv_ranges,
563cf2647a7SEric Auger 0, UINT64_MAX);
564cf2647a7SEric Auger rebuild_resv_regions(sdev);
565cf2647a7SEric Auger
566cf2647a7SEric Auger return 0;
567cf2647a7SEric Auger }
568cf2647a7SEric Auger
virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU * s,PCIBus * bus,int devfn)56962ac01d1SEric Auger static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
57062ac01d1SEric Auger int devfn)
57162ac01d1SEric Auger {
57262ac01d1SEric Auger IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
57362ac01d1SEric Auger IOMMUDevice *sdev;
57462ac01d1SEric Auger
57562ac01d1SEric Auger if (!sbus) {
57662ac01d1SEric Auger return;
57762ac01d1SEric Auger }
57862ac01d1SEric Auger
57962ac01d1SEric Auger sdev = sbus->pbdev[devfn];
58062ac01d1SEric Auger if (!sdev) {
58162ac01d1SEric Auger return;
58262ac01d1SEric Auger }
58362ac01d1SEric Auger
58462ac01d1SEric Auger g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free);
58562ac01d1SEric Auger g_list_free_full(sdev->resv_regions, g_free);
58662ac01d1SEric Auger sdev->host_resv_ranges = NULL;
58762ac01d1SEric Auger sdev->resv_regions = NULL;
58862ac01d1SEric Auger add_prop_resv_regions(sdev);
58962ac01d1SEric Auger }
59062ac01d1SEric Auger
59162ac01d1SEric Auger
check_page_size_mask(VirtIOIOMMU * viommu,uint64_t new_mask,Error ** errp)592d7c8c95fSEric Auger static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask,
593d7c8c95fSEric Auger Error **errp)
594d7c8c95fSEric Auger {
595d7c8c95fSEric Auger uint64_t cur_mask = viommu->config.page_size_mask;
596d7c8c95fSEric Auger
597d7c8c95fSEric Auger if ((cur_mask & new_mask) == 0) {
598d7c8c95fSEric Auger error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64
599d7c8c95fSEric Auger " incompatible with currently supported mask 0x%"PRIx64,
600d7c8c95fSEric Auger new_mask, cur_mask);
601d7c8c95fSEric Auger return false;
602d7c8c95fSEric Auger }
603d7c8c95fSEric Auger /*
604d7c8c95fSEric Auger * Once the granule is frozen we can't change the mask anymore. If by
605d7c8c95fSEric Auger * chance the hotplugged device supports the same granule, we can still
606d7c8c95fSEric Auger * accept it.
607d7c8c95fSEric Auger */
608d7c8c95fSEric Auger if (viommu->granule_frozen) {
609d7c8c95fSEric Auger int cur_granule = ctz64(cur_mask);
610d7c8c95fSEric Auger
611d7c8c95fSEric Auger if (!(BIT_ULL(cur_granule) & new_mask)) {
612d7c8c95fSEric Auger error_setg(errp,
613d7c8c95fSEric Auger "virtio-iommu does not support frozen granule 0x%llx",
614d7c8c95fSEric Auger BIT_ULL(cur_granule));
615d7c8c95fSEric Auger return false;
616d7c8c95fSEric Auger }
617d7c8c95fSEric Auger }
618d7c8c95fSEric Auger return true;
619d7c8c95fSEric Auger }
620d7c8c95fSEric Auger
virtio_iommu_set_iommu_device(PCIBus * bus,void * opaque,int devfn,HostIOMMUDevice * hiod,Error ** errp)621817ef10dSEric Auger static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
622817ef10dSEric Auger HostIOMMUDevice *hiod, Error **errp)
623817ef10dSEric Auger {
624d7c8c95fSEric Auger ERRP_GUARD();
625817ef10dSEric Auger VirtIOIOMMU *viommu = opaque;
626cf2647a7SEric Auger HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
627817ef10dSEric Auger struct hiod_key *new_key;
628cf2647a7SEric Auger GList *host_iova_ranges = NULL;
629817ef10dSEric Auger
630817ef10dSEric Auger assert(hiod);
631817ef10dSEric Auger
632817ef10dSEric Auger if (get_host_iommu_device(viommu, bus, devfn)) {
633817ef10dSEric Auger error_setg(errp, "Host IOMMU device already exists");
634817ef10dSEric Auger return false;
635817ef10dSEric Auger }
636817ef10dSEric Auger
637cf2647a7SEric Auger if (hiodc->get_iova_ranges) {
638cf2647a7SEric Auger int ret;
639d59ca1caSEric Auger host_iova_ranges = hiodc->get_iova_ranges(hiod);
640cf2647a7SEric Auger if (!host_iova_ranges) {
641cf2647a7SEric Auger return true; /* some old kernels may not support that capability */
642cf2647a7SEric Auger }
643cf2647a7SEric Auger ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus,
644cf2647a7SEric Auger hiod->aliased_devfn,
645cf2647a7SEric Auger host_iova_ranges, errp);
646cf2647a7SEric Auger if (ret) {
647d7c8c95fSEric Auger goto error;
648d7c8c95fSEric Auger }
649d7c8c95fSEric Auger }
650d7c8c95fSEric Auger if (hiodc->get_page_size_mask) {
651d7c8c95fSEric Auger uint64_t new_mask = hiodc->get_page_size_mask(hiod);
652d7c8c95fSEric Auger
653d7c8c95fSEric Auger if (check_page_size_mask(viommu, new_mask, errp)) {
654d7c8c95fSEric Auger /*
655d7c8c95fSEric Auger * The default mask depends on the "granule" property. For example,
656d7c8c95fSEric Auger * with 4k granule, it is -(4 * KiB). When an assigned device has
657d7c8c95fSEric Auger * page size restrictions due to the hardware IOMMU configuration,
658d7c8c95fSEric Auger * apply this restriction to the mask.
659d7c8c95fSEric Auger */
660d7c8c95fSEric Auger trace_virtio_iommu_update_page_size_mask(hiod->name,
661d7c8c95fSEric Auger viommu->config.page_size_mask,
662d7c8c95fSEric Auger new_mask);
663d7c8c95fSEric Auger if (!viommu->granule_frozen) {
664d7c8c95fSEric Auger viommu->config.page_size_mask &= new_mask;
665d7c8c95fSEric Auger }
666d7c8c95fSEric Auger } else {
667d7c8c95fSEric Auger error_prepend(errp, "%s: ", hiod->name);
668d7c8c95fSEric Auger goto error;
669cf2647a7SEric Auger }
670cf2647a7SEric Auger }
671cf2647a7SEric Auger
672817ef10dSEric Auger new_key = g_malloc(sizeof(*new_key));
673817ef10dSEric Auger new_key->bus = bus;
674817ef10dSEric Auger new_key->devfn = devfn;
675817ef10dSEric Auger
676817ef10dSEric Auger object_ref(hiod);
677817ef10dSEric Auger g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod);
678cf2647a7SEric Auger g_list_free_full(host_iova_ranges, g_free);
679817ef10dSEric Auger
680817ef10dSEric Auger return true;
681d7c8c95fSEric Auger error:
682d7c8c95fSEric Auger g_list_free_full(host_iova_ranges, g_free);
683d7c8c95fSEric Auger return false;
684817ef10dSEric Auger }
685817ef10dSEric Auger
686817ef10dSEric Auger static void
virtio_iommu_unset_iommu_device(PCIBus * bus,void * opaque,int devfn)687817ef10dSEric Auger virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
688817ef10dSEric Auger {
689817ef10dSEric Auger VirtIOIOMMU *viommu = opaque;
690817ef10dSEric Auger HostIOMMUDevice *hiod;
691817ef10dSEric Auger struct hiod_key key = {
692817ef10dSEric Auger .bus = bus,
693817ef10dSEric Auger .devfn = devfn,
694817ef10dSEric Auger };
695817ef10dSEric Auger
696817ef10dSEric Auger hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key);
697817ef10dSEric Auger if (!hiod) {
698817ef10dSEric Auger return;
699817ef10dSEric Auger }
70062ac01d1SEric Auger virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus,
70162ac01d1SEric Auger hiod->aliased_devfn);
702817ef10dSEric Auger
703817ef10dSEric Auger g_hash_table_remove(viommu->host_iommu_devices, &key);
704817ef10dSEric Auger }
705817ef10dSEric Auger
706ba7d12ebSYi Liu static const PCIIOMMUOps virtio_iommu_ops = {
707ba7d12ebSYi Liu .get_address_space = virtio_iommu_find_add_as,
708817ef10dSEric Auger .set_iommu_device = virtio_iommu_set_iommu_device,
709817ef10dSEric Auger .unset_iommu_device = virtio_iommu_unset_iommu_device,
710ba7d12ebSYi Liu };
711ba7d12ebSYi Liu
virtio_iommu_attach(VirtIOIOMMU * s,struct virtio_iommu_req_attach * req)7125442b854SEric Auger static int virtio_iommu_attach(VirtIOIOMMU *s,
7135442b854SEric Auger struct virtio_iommu_req_attach *req)
71422c37a10SEric Auger {
7155442b854SEric Auger uint32_t domain_id = le32_to_cpu(req->domain);
7165442b854SEric Auger uint32_t ep_id = le32_to_cpu(req->endpoint);
717d9c96f24SJean-Philippe Brucker uint32_t flags = le32_to_cpu(req->flags);
718cfb42188SEric Auger VirtIOIOMMUDomain *domain;
719cfb42188SEric Auger VirtIOIOMMUEndpoint *ep;
72090519b90SZhenzhong Duan IOMMUDevice *sdev;
7215442b854SEric Auger
7225442b854SEric Auger trace_virtio_iommu_attach(domain_id, ep_id);
7235442b854SEric Auger
724d9c96f24SJean-Philippe Brucker if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
725d9c96f24SJean-Philippe Brucker return VIRTIO_IOMMU_S_INVAL;
726d9c96f24SJean-Philippe Brucker }
727d9c96f24SJean-Philippe Brucker
728cfb42188SEric Auger ep = virtio_iommu_get_endpoint(s, ep_id);
729cfb42188SEric Auger if (!ep) {
730cfb42188SEric Auger return VIRTIO_IOMMU_S_NOENT;
731cfb42188SEric Auger }
732cfb42188SEric Auger
733cfb42188SEric Auger if (ep->domain) {
734cfb42188SEric Auger VirtIOIOMMUDomain *previous_domain = ep->domain;
735cfb42188SEric Auger /*
736cfb42188SEric Auger * the device is already attached to a domain,
737cfb42188SEric Auger * detach it first
738cfb42188SEric Auger */
739cfb42188SEric Auger virtio_iommu_detach_endpoint_from_domain(ep);
740cfb42188SEric Auger if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
741cfb42188SEric Auger g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
742cfb42188SEric Auger }
743cfb42188SEric Auger }
744cfb42188SEric Auger
745d9c96f24SJean-Philippe Brucker domain = virtio_iommu_get_domain(s, domain_id,
746d9c96f24SJean-Philippe Brucker flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
747d9c96f24SJean-Philippe Brucker if (!domain) {
748d9c96f24SJean-Philippe Brucker /* Incompatible bypass flag */
749d9c96f24SJean-Philippe Brucker return VIRTIO_IOMMU_S_INVAL;
750d9c96f24SJean-Philippe Brucker }
751cfb42188SEric Auger QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
752cfb42188SEric Auger
753cfb42188SEric Auger ep->domain = domain;
75490519b90SZhenzhong Duan sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
75590519b90SZhenzhong Duan virtio_iommu_switch_address_space(sdev);
756cfb42188SEric Auger
7572f6eeb5fSBharat Bhushan /* Replay domain mappings on the associated memory region */
7582f6eeb5fSBharat Bhushan g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
7592f6eeb5fSBharat Bhushan ep->iommu_mr);
7602f6eeb5fSBharat Bhushan
761cfb42188SEric Auger return VIRTIO_IOMMU_S_OK;
76222c37a10SEric Auger }
7635442b854SEric Auger
virtio_iommu_detach(VirtIOIOMMU * s,struct virtio_iommu_req_detach * req)7645442b854SEric Auger static int virtio_iommu_detach(VirtIOIOMMU *s,
7655442b854SEric Auger struct virtio_iommu_req_detach *req)
76622c37a10SEric Auger {
7675442b854SEric Auger uint32_t domain_id = le32_to_cpu(req->domain);
7685442b854SEric Auger uint32_t ep_id = le32_to_cpu(req->endpoint);
769cfb42188SEric Auger VirtIOIOMMUDomain *domain;
770cfb42188SEric Auger VirtIOIOMMUEndpoint *ep;
7715442b854SEric Auger
7725442b854SEric Auger trace_virtio_iommu_detach(domain_id, ep_id);
7735442b854SEric Auger
774cfb42188SEric Auger ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
775cfb42188SEric Auger if (!ep) {
776cfb42188SEric Auger return VIRTIO_IOMMU_S_NOENT;
777cfb42188SEric Auger }
778cfb42188SEric Auger
779cfb42188SEric Auger domain = ep->domain;
780cfb42188SEric Auger
781cfb42188SEric Auger if (!domain || domain->id != domain_id) {
782cfb42188SEric Auger return VIRTIO_IOMMU_S_INVAL;
783cfb42188SEric Auger }
784cfb42188SEric Auger
785cfb42188SEric Auger virtio_iommu_detach_endpoint_from_domain(ep);
786cfb42188SEric Auger
787cfb42188SEric Auger if (QLIST_EMPTY(&domain->endpoint_list)) {
788cfb42188SEric Auger g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
789cfb42188SEric Auger }
7901993d634SEric Auger g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id));
791cfb42188SEric Auger return VIRTIO_IOMMU_S_OK;
79222c37a10SEric Auger }
7935442b854SEric Auger
virtio_iommu_map(VirtIOIOMMU * s,struct virtio_iommu_req_map * req)7945442b854SEric Auger static int virtio_iommu_map(VirtIOIOMMU *s,
7955442b854SEric Auger struct virtio_iommu_req_map *req)
79622c37a10SEric Auger {
7975442b854SEric Auger uint32_t domain_id = le32_to_cpu(req->domain);
7985442b854SEric Auger uint64_t phys_start = le64_to_cpu(req->phys_start);
7995442b854SEric Auger uint64_t virt_start = le64_to_cpu(req->virt_start);
8005442b854SEric Auger uint64_t virt_end = le64_to_cpu(req->virt_end);
8015442b854SEric Auger uint32_t flags = le32_to_cpu(req->flags);
802fe2cacaeSEric Auger VirtIOIOMMUDomain *domain;
803fe2cacaeSEric Auger VirtIOIOMMUInterval *interval;
804fe2cacaeSEric Auger VirtIOIOMMUMapping *mapping;
80515e4c8f0SBharat Bhushan VirtIOIOMMUEndpoint *ep;
806fe2cacaeSEric Auger
807fe2cacaeSEric Auger if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
808fe2cacaeSEric Auger return VIRTIO_IOMMU_S_INVAL;
809fe2cacaeSEric Auger }
810fe2cacaeSEric Auger
811fe2cacaeSEric Auger domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
812fe2cacaeSEric Auger if (!domain) {
813fe2cacaeSEric Auger return VIRTIO_IOMMU_S_NOENT;
814fe2cacaeSEric Auger }
815fe2cacaeSEric Auger
816d9c96f24SJean-Philippe Brucker if (domain->bypass) {
817d9c96f24SJean-Philippe Brucker return VIRTIO_IOMMU_S_INVAL;
818d9c96f24SJean-Philippe Brucker }
819d9c96f24SJean-Philippe Brucker
820fe2cacaeSEric Auger interval = g_malloc0(sizeof(*interval));
821fe2cacaeSEric Auger
822fe2cacaeSEric Auger interval->low = virt_start;
823fe2cacaeSEric Auger interval->high = virt_end;
824fe2cacaeSEric Auger
825fe2cacaeSEric Auger mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
826fe2cacaeSEric Auger if (mapping) {
827fe2cacaeSEric Auger g_free(interval);
828fe2cacaeSEric Auger return VIRTIO_IOMMU_S_INVAL;
829fe2cacaeSEric Auger }
8305442b854SEric Auger
8315442b854SEric Auger trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
8325442b854SEric Auger
833fe2cacaeSEric Auger mapping = g_malloc0(sizeof(*mapping));
834fe2cacaeSEric Auger mapping->phys_addr = phys_start;
835fe2cacaeSEric Auger mapping->flags = flags;
836fe2cacaeSEric Auger
837fe2cacaeSEric Auger g_tree_insert(domain->mappings, interval, mapping);
838fe2cacaeSEric Auger
83915e4c8f0SBharat Bhushan QLIST_FOREACH(ep, &domain->endpoint_list, next) {
84015e4c8f0SBharat Bhushan virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
84115e4c8f0SBharat Bhushan flags);
84215e4c8f0SBharat Bhushan }
84315e4c8f0SBharat Bhushan
844fe2cacaeSEric Auger return VIRTIO_IOMMU_S_OK;
84522c37a10SEric Auger }
8465442b854SEric Auger
virtio_iommu_unmap(VirtIOIOMMU * s,struct virtio_iommu_req_unmap * req)8475442b854SEric Auger static int virtio_iommu_unmap(VirtIOIOMMU *s,
8485442b854SEric Auger struct virtio_iommu_req_unmap *req)
84922c37a10SEric Auger {
8505442b854SEric Auger uint32_t domain_id = le32_to_cpu(req->domain);
8515442b854SEric Auger uint64_t virt_start = le64_to_cpu(req->virt_start);
8525442b854SEric Auger uint64_t virt_end = le64_to_cpu(req->virt_end);
853fe2cacaeSEric Auger VirtIOIOMMUMapping *iter_val;
854fe2cacaeSEric Auger VirtIOIOMMUInterval interval, *iter_key;
855fe2cacaeSEric Auger VirtIOIOMMUDomain *domain;
85615e4c8f0SBharat Bhushan VirtIOIOMMUEndpoint *ep;
857fe2cacaeSEric Auger int ret = VIRTIO_IOMMU_S_OK;
8585442b854SEric Auger
8595442b854SEric Auger trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
8605442b854SEric Auger
861fe2cacaeSEric Auger domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
862fe2cacaeSEric Auger if (!domain) {
863fe2cacaeSEric Auger return VIRTIO_IOMMU_S_NOENT;
864fe2cacaeSEric Auger }
865d9c96f24SJean-Philippe Brucker
866d9c96f24SJean-Philippe Brucker if (domain->bypass) {
867d9c96f24SJean-Philippe Brucker return VIRTIO_IOMMU_S_INVAL;
868d9c96f24SJean-Philippe Brucker }
869d9c96f24SJean-Philippe Brucker
870fe2cacaeSEric Auger interval.low = virt_start;
871fe2cacaeSEric Auger interval.high = virt_end;
872fe2cacaeSEric Auger
873fe2cacaeSEric Auger while (g_tree_lookup_extended(domain->mappings, &interval,
874fe2cacaeSEric Auger (void **)&iter_key, (void**)&iter_val)) {
875fe2cacaeSEric Auger uint64_t current_low = iter_key->low;
876fe2cacaeSEric Auger uint64_t current_high = iter_key->high;
877fe2cacaeSEric Auger
878fe2cacaeSEric Auger if (interval.low <= current_low && interval.high >= current_high) {
87915e4c8f0SBharat Bhushan QLIST_FOREACH(ep, &domain->endpoint_list, next) {
88015e4c8f0SBharat Bhushan virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
88115e4c8f0SBharat Bhushan current_high);
88215e4c8f0SBharat Bhushan }
883fe2cacaeSEric Auger g_tree_remove(domain->mappings, iter_key);
884fe2cacaeSEric Auger trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
885fe2cacaeSEric Auger } else {
886fe2cacaeSEric Auger ret = VIRTIO_IOMMU_S_RANGE;
887fe2cacaeSEric Auger break;
888fe2cacaeSEric Auger }
889fe2cacaeSEric Auger }
890fe2cacaeSEric Auger return ret;
89122c37a10SEric Auger }
89222c37a10SEric Auger
virtio_iommu_fill_resv_mem_prop(IOMMUDevice * sdev,uint32_t ep,uint8_t * buf,size_t free)89309b4c3d6SEric Auger static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep,
8941733eebbSEric Auger uint8_t *buf, size_t free)
8951733eebbSEric Auger {
8961733eebbSEric Auger struct virtio_iommu_probe_resv_mem prop = {};
8971733eebbSEric Auger size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
898908cae0dSEric Auger GList *l;
8991733eebbSEric Auger
900908cae0dSEric Auger total = size * g_list_length(sdev->resv_regions);
9011733eebbSEric Auger if (total > free) {
9021733eebbSEric Auger return -ENOSPC;
9031733eebbSEric Auger }
9041733eebbSEric Auger
905908cae0dSEric Auger for (l = sdev->resv_regions; l; l = l->next) {
906908cae0dSEric Auger ReservedRegion *reg = l->data;
907908cae0dSEric Auger unsigned subtype = reg->type;
908908cae0dSEric Auger Range *range = ®->range;
9091733eebbSEric Auger
9101733eebbSEric Auger assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
9111733eebbSEric Auger subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
9121733eebbSEric Auger prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
9131733eebbSEric Auger prop.head.length = cpu_to_le16(length);
9141733eebbSEric Auger prop.subtype = subtype;
91541cc70cdSEric Auger prop.start = cpu_to_le64(range_lob(range));
91641cc70cdSEric Auger prop.end = cpu_to_le64(range_upb(range));
9171733eebbSEric Auger
9181733eebbSEric Auger memcpy(buf, &prop, size);
9191733eebbSEric Auger
9201733eebbSEric Auger trace_virtio_iommu_fill_resv_property(ep, prop.subtype,
9211733eebbSEric Auger prop.start, prop.end);
9221733eebbSEric Auger buf += size;
9231733eebbSEric Auger }
9241733eebbSEric Auger return total;
9251733eebbSEric Auger }
9261733eebbSEric Auger
9271733eebbSEric Auger /**
9281733eebbSEric Auger * virtio_iommu_probe - Fill the probe request buffer with
9291733eebbSEric Auger * the properties the device is able to return
9301733eebbSEric Auger */
virtio_iommu_probe(VirtIOIOMMU * s,struct virtio_iommu_req_probe * req,uint8_t * buf)9311733eebbSEric Auger static int virtio_iommu_probe(VirtIOIOMMU *s,
9321733eebbSEric Auger struct virtio_iommu_req_probe *req,
9331733eebbSEric Auger uint8_t *buf)
9341733eebbSEric Auger {
9351733eebbSEric Auger uint32_t ep_id = le32_to_cpu(req->endpoint);
93609b4c3d6SEric Auger IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id);
9371733eebbSEric Auger size_t free = VIOMMU_PROBE_SIZE;
93809b4c3d6SEric Auger IOMMUDevice *sdev;
9391733eebbSEric Auger ssize_t count;
9401733eebbSEric Auger
94109b4c3d6SEric Auger if (!iommu_mr) {
9421733eebbSEric Auger return VIRTIO_IOMMU_S_NOENT;
9431733eebbSEric Auger }
9441733eebbSEric Auger
94509b4c3d6SEric Auger sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr);
94609b4c3d6SEric Auger
94709b4c3d6SEric Auger count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free);
9481733eebbSEric Auger if (count < 0) {
9491733eebbSEric Auger return VIRTIO_IOMMU_S_INVAL;
9501733eebbSEric Auger }
9511733eebbSEric Auger buf += count;
9521733eebbSEric Auger free -= count;
9531733eebbSEric Auger
9541733eebbSEric Auger return VIRTIO_IOMMU_S_OK;
9551733eebbSEric Auger }
9561733eebbSEric Auger
virtio_iommu_iov_to_req(struct iovec * iov,unsigned int iov_cnt,void * req,size_t payload_sz)9575442b854SEric Auger static int virtio_iommu_iov_to_req(struct iovec *iov,
9585442b854SEric Auger unsigned int iov_cnt,
95945461aacSZhenzhong Duan void *req, size_t payload_sz)
9605442b854SEric Auger {
96145461aacSZhenzhong Duan size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
9625442b854SEric Auger
9635442b854SEric Auger if (unlikely(sz != payload_sz)) {
9645442b854SEric Auger return VIRTIO_IOMMU_S_INVAL;
9655442b854SEric Auger }
9665442b854SEric Auger return 0;
9675442b854SEric Auger }
9685442b854SEric Auger
9695442b854SEric Auger #define virtio_iommu_handle_req(__req) \
9705442b854SEric Auger static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
9715442b854SEric Auger struct iovec *iov, \
9725442b854SEric Auger unsigned int iov_cnt) \
9735442b854SEric Auger { \
9745442b854SEric Auger struct virtio_iommu_req_ ## __req req; \
97545461aacSZhenzhong Duan int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \
97645461aacSZhenzhong Duan sizeof(req) - sizeof(struct virtio_iommu_req_tail));\
9775442b854SEric Auger \
9785442b854SEric Auger return ret ? ret : virtio_iommu_ ## __req(s, &req); \
9795442b854SEric Auger }
9805442b854SEric Auger
9815442b854SEric Auger virtio_iommu_handle_req(attach)
virtio_iommu_handle_req(detach)9825442b854SEric Auger virtio_iommu_handle_req(detach)
9835442b854SEric Auger virtio_iommu_handle_req(map)
9845442b854SEric Auger virtio_iommu_handle_req(unmap)
9855442b854SEric Auger
9861733eebbSEric Auger static int virtio_iommu_handle_probe(VirtIOIOMMU *s,
9871733eebbSEric Auger struct iovec *iov,
9881733eebbSEric Auger unsigned int iov_cnt,
9891733eebbSEric Auger uint8_t *buf)
9901733eebbSEric Auger {
9911733eebbSEric Auger struct virtio_iommu_req_probe req;
9921733eebbSEric Auger int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req));
9931733eebbSEric Auger
9941733eebbSEric Auger return ret ? ret : virtio_iommu_probe(s, &req, buf);
9951733eebbSEric Auger }
9961733eebbSEric Auger
virtio_iommu_handle_command(VirtIODevice * vdev,VirtQueue * vq)99722c37a10SEric Auger static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
99822c37a10SEric Auger {
99922c37a10SEric Auger VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
100022c37a10SEric Auger struct virtio_iommu_req_head head;
100122c37a10SEric Auger struct virtio_iommu_req_tail tail = {};
100222c37a10SEric Auger VirtQueueElement *elem;
100322c37a10SEric Auger unsigned int iov_cnt;
100422c37a10SEric Auger struct iovec *iov;
10051733eebbSEric Auger void *buf = NULL;
1006cf2f89edSEric Auger size_t sz;
100722c37a10SEric Auger
100822c37a10SEric Auger for (;;) {
1009cf2f89edSEric Auger size_t output_size = sizeof(tail);
1010cf2f89edSEric Auger
101122c37a10SEric Auger elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
101222c37a10SEric Auger if (!elem) {
101322c37a10SEric Auger return;
101422c37a10SEric Auger }
101522c37a10SEric Auger
101622c37a10SEric Auger if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
101722c37a10SEric Auger iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
101822c37a10SEric Auger virtio_error(vdev, "virtio-iommu bad head/tail size");
101922c37a10SEric Auger virtqueue_detach_element(vq, elem, 0);
102022c37a10SEric Auger g_free(elem);
102122c37a10SEric Auger break;
102222c37a10SEric Auger }
102322c37a10SEric Auger
102422c37a10SEric Auger iov_cnt = elem->out_num;
102522c37a10SEric Auger iov = elem->out_sg;
102622c37a10SEric Auger sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
102722c37a10SEric Auger if (unlikely(sz != sizeof(head))) {
1028704391f9SManos Pitsidianakis qemu_log_mask(LOG_GUEST_ERROR,
1029704391f9SManos Pitsidianakis "%s: read %zu bytes from command head"
1030704391f9SManos Pitsidianakis "but expected %zu\n", __func__, sz, sizeof(head));
103122c37a10SEric Auger tail.status = VIRTIO_IOMMU_S_DEVERR;
103222c37a10SEric Auger goto out;
103322c37a10SEric Auger }
103408f2030aSZhenzhong Duan qemu_rec_mutex_lock(&s->mutex);
103522c37a10SEric Auger switch (head.type) {
103622c37a10SEric Auger case VIRTIO_IOMMU_T_ATTACH:
103722c37a10SEric Auger tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
103822c37a10SEric Auger break;
103922c37a10SEric Auger case VIRTIO_IOMMU_T_DETACH:
104022c37a10SEric Auger tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
104122c37a10SEric Auger break;
104222c37a10SEric Auger case VIRTIO_IOMMU_T_MAP:
104322c37a10SEric Auger tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
104422c37a10SEric Auger break;
104522c37a10SEric Auger case VIRTIO_IOMMU_T_UNMAP:
104622c37a10SEric Auger tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
104722c37a10SEric Auger break;
10481733eebbSEric Auger case VIRTIO_IOMMU_T_PROBE:
10491733eebbSEric Auger {
10501733eebbSEric Auger struct virtio_iommu_req_tail *ptail;
10511733eebbSEric Auger
10521733eebbSEric Auger output_size = s->config.probe_size + sizeof(tail);
10531733eebbSEric Auger buf = g_malloc0(output_size);
10541733eebbSEric Auger
10553d558330SMarkus Armbruster ptail = buf + s->config.probe_size;
10561733eebbSEric Auger ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
1057e95e05daSEric Auger break;
10581733eebbSEric Auger }
105922c37a10SEric Auger default:
106022c37a10SEric Auger tail.status = VIRTIO_IOMMU_S_UNSUPP;
106122c37a10SEric Auger }
106208f2030aSZhenzhong Duan qemu_rec_mutex_unlock(&s->mutex);
106322c37a10SEric Auger
106422c37a10SEric Auger out:
106522c37a10SEric Auger sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
10661733eebbSEric Auger buf ? buf : &tail, output_size);
1067704391f9SManos Pitsidianakis if (unlikely(sz != output_size)) {
1068704391f9SManos Pitsidianakis qemu_log_mask(LOG_GUEST_ERROR,
1069704391f9SManos Pitsidianakis "%s: wrote %zu bytes to command response"
1070704391f9SManos Pitsidianakis "but response size is %zu\n",
1071704391f9SManos Pitsidianakis __func__, sz, output_size);
1072704391f9SManos Pitsidianakis tail.status = VIRTIO_IOMMU_S_DEVERR;
1073704391f9SManos Pitsidianakis /*
1074704391f9SManos Pitsidianakis * We checked that sizeof(tail) can fit to elem->in_sg at the
1075704391f9SManos Pitsidianakis * beginning of the loop
1076704391f9SManos Pitsidianakis */
1077704391f9SManos Pitsidianakis output_size = sizeof(tail);
1078704391f9SManos Pitsidianakis g_free(buf);
1079704391f9SManos Pitsidianakis buf = NULL;
1080704391f9SManos Pitsidianakis sz = iov_from_buf(elem->in_sg,
1081704391f9SManos Pitsidianakis elem->in_num,
1082704391f9SManos Pitsidianakis 0,
1083704391f9SManos Pitsidianakis &tail,
1084704391f9SManos Pitsidianakis output_size);
1085704391f9SManos Pitsidianakis }
10861733eebbSEric Auger assert(sz == output_size);
108722c37a10SEric Auger
10881733eebbSEric Auger virtqueue_push(vq, elem, sz);
108922c37a10SEric Auger virtio_notify(vdev, vq);
109022c37a10SEric Auger g_free(elem);
10911733eebbSEric Auger g_free(buf);
10924bf58c72SWentao Liang buf = NULL;
109322c37a10SEric Auger }
109422c37a10SEric Auger }
109522c37a10SEric Auger
virtio_iommu_report_fault(VirtIOIOMMU * viommu,uint8_t reason,int flags,uint32_t endpoint,uint64_t address)1096a7c1da8aSEric Auger static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
1097a7c1da8aSEric Auger int flags, uint32_t endpoint,
1098a7c1da8aSEric Auger uint64_t address)
1099a7c1da8aSEric Auger {
1100a7c1da8aSEric Auger VirtIODevice *vdev = &viommu->parent_obj;
1101a7c1da8aSEric Auger VirtQueue *vq = viommu->event_vq;
1102a7c1da8aSEric Auger struct virtio_iommu_fault fault;
1103a7c1da8aSEric Auger VirtQueueElement *elem;
1104a7c1da8aSEric Auger size_t sz;
1105a7c1da8aSEric Auger
1106a7c1da8aSEric Auger memset(&fault, 0, sizeof(fault));
1107a7c1da8aSEric Auger fault.reason = reason;
1108a7c1da8aSEric Auger fault.flags = cpu_to_le32(flags);
1109a7c1da8aSEric Auger fault.endpoint = cpu_to_le32(endpoint);
1110a7c1da8aSEric Auger fault.address = cpu_to_le64(address);
1111a7c1da8aSEric Auger
1112a7c1da8aSEric Auger elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1113a7c1da8aSEric Auger
1114a7c1da8aSEric Auger if (!elem) {
1115a7c1da8aSEric Auger error_report_once(
1116a7c1da8aSEric Auger "no buffer available in event queue to report event");
1117a7c1da8aSEric Auger return;
1118a7c1da8aSEric Auger }
1119a7c1da8aSEric Auger
1120a7c1da8aSEric Auger if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
1121a7c1da8aSEric Auger virtio_error(vdev, "error buffer of wrong size");
1122a7c1da8aSEric Auger virtqueue_detach_element(vq, elem, 0);
1123a7c1da8aSEric Auger g_free(elem);
1124a7c1da8aSEric Auger return;
1125a7c1da8aSEric Auger }
1126a7c1da8aSEric Auger
1127a7c1da8aSEric Auger sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
1128a7c1da8aSEric Auger &fault, sizeof(fault));
1129a7c1da8aSEric Auger assert(sz == sizeof(fault));
1130a7c1da8aSEric Auger
1131a7c1da8aSEric Auger trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
1132a7c1da8aSEric Auger virtqueue_push(vq, elem, sz);
1133a7c1da8aSEric Auger virtio_notify(vdev, vq);
1134a7c1da8aSEric Auger g_free(elem);
1135a7c1da8aSEric Auger
1136a7c1da8aSEric Auger }
1137a7c1da8aSEric Auger
virtio_iommu_translate(IOMMUMemoryRegion * mr,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)1138cfb42188SEric Auger static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
1139cfb42188SEric Auger IOMMUAccessFlags flag,
1140cfb42188SEric Auger int iommu_idx)
1141cfb42188SEric Auger {
1142cfb42188SEric Auger IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
1143ed8449b3SEric Auger VirtIOIOMMUInterval interval, *mapping_key;
1144ed8449b3SEric Auger VirtIOIOMMUMapping *mapping_value;
1145ed8449b3SEric Auger VirtIOIOMMU *s = sdev->viommu;
1146a7c1da8aSEric Auger bool read_fault, write_fault;
1147ed8449b3SEric Auger VirtIOIOMMUEndpoint *ep;
1148a7c1da8aSEric Auger uint32_t sid, flags;
1149ed8449b3SEric Auger bool bypass_allowed;
11501084feddSEric Auger int granule;
1151ed8449b3SEric Auger bool found;
1152908cae0dSEric Auger GList *l;
1153ed8449b3SEric Auger
1154ed8449b3SEric Auger interval.low = addr;
1155ed8449b3SEric Auger interval.high = addr + 1;
11561084feddSEric Auger granule = ctz64(s->config.page_size_mask);
1157cfb42188SEric Auger
1158cfb42188SEric Auger IOMMUTLBEntry entry = {
1159cfb42188SEric Auger .target_as = &address_space_memory,
1160cfb42188SEric Auger .iova = addr,
1161cfb42188SEric Auger .translated_addr = addr,
11621084feddSEric Auger .addr_mask = BIT_ULL(granule) - 1,
1163cfb42188SEric Auger .perm = IOMMU_NONE,
1164cfb42188SEric Auger };
1165cfb42188SEric Auger
1166448179e3SJean-Philippe Brucker bypass_allowed = s->config.bypass;
1167ed8449b3SEric Auger
1168cfb42188SEric Auger sid = virtio_iommu_get_bdf(sdev);
1169cfb42188SEric Auger
1170cfb42188SEric Auger trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
117108f2030aSZhenzhong Duan qemu_rec_mutex_lock(&s->mutex);
1172ed8449b3SEric Auger
1173ed8449b3SEric Auger ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
117423b5f0ffSZhenzhong Duan
117523b5f0ffSZhenzhong Duan if (bypass_allowed)
117623b5f0ffSZhenzhong Duan assert(ep && ep->domain && !ep->domain->bypass);
117723b5f0ffSZhenzhong Duan
1178ed8449b3SEric Auger if (!ep) {
1179ed8449b3SEric Auger if (!bypass_allowed) {
1180ed8449b3SEric Auger error_report_once("%s sid=%d is not known!!", __func__, sid);
1181a7c1da8aSEric Auger virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
1182a7c1da8aSEric Auger VIRTIO_IOMMU_FAULT_F_ADDRESS,
1183a7c1da8aSEric Auger sid, addr);
1184ed8449b3SEric Auger } else {
1185ed8449b3SEric Auger entry.perm = flag;
1186ed8449b3SEric Auger }
1187ed8449b3SEric Auger goto unlock;
1188ed8449b3SEric Auger }
1189ed8449b3SEric Auger
1190908cae0dSEric Auger for (l = sdev->resv_regions; l; l = l->next) {
1191908cae0dSEric Auger ReservedRegion *reg = l->data;
11920f5a3092SEric Auger
1193e8f433f8SEric Auger if (range_contains(®->range, addr)) {
11940f5a3092SEric Auger switch (reg->type) {
11950f5a3092SEric Auger case VIRTIO_IOMMU_RESV_MEM_T_MSI:
11960f5a3092SEric Auger entry.perm = flag;
11970f5a3092SEric Auger break;
11980f5a3092SEric Auger case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
11990f5a3092SEric Auger default:
12000f5a3092SEric Auger virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
12010f5a3092SEric Auger VIRTIO_IOMMU_FAULT_F_ADDRESS,
12020f5a3092SEric Auger sid, addr);
12030f5a3092SEric Auger break;
12040f5a3092SEric Auger }
12050f5a3092SEric Auger goto unlock;
12060f5a3092SEric Auger }
12070f5a3092SEric Auger }
12080f5a3092SEric Auger
1209ed8449b3SEric Auger if (!ep->domain) {
1210ed8449b3SEric Auger if (!bypass_allowed) {
1211ed8449b3SEric Auger error_report_once("%s %02x:%02x.%01x not attached to any domain",
1212ed8449b3SEric Auger __func__, PCI_BUS_NUM(sid),
1213ed8449b3SEric Auger PCI_SLOT(sid), PCI_FUNC(sid));
1214a7c1da8aSEric Auger virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
1215a7c1da8aSEric Auger VIRTIO_IOMMU_FAULT_F_ADDRESS,
1216a7c1da8aSEric Auger sid, addr);
1217ed8449b3SEric Auger } else {
1218ed8449b3SEric Auger entry.perm = flag;
1219ed8449b3SEric Auger }
1220ed8449b3SEric Auger goto unlock;
1221d9c96f24SJean-Philippe Brucker } else if (ep->domain->bypass) {
1222d9c96f24SJean-Philippe Brucker entry.perm = flag;
1223d9c96f24SJean-Philippe Brucker goto unlock;
1224ed8449b3SEric Auger }
1225ed8449b3SEric Auger
1226ed8449b3SEric Auger found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
1227ed8449b3SEric Auger (void **)&mapping_key,
1228ed8449b3SEric Auger (void **)&mapping_value);
1229ed8449b3SEric Auger if (!found) {
1230ed8449b3SEric Auger error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
1231ed8449b3SEric Auger __func__, addr, sid);
1232a7c1da8aSEric Auger virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
1233a7c1da8aSEric Auger VIRTIO_IOMMU_FAULT_F_ADDRESS,
1234a7c1da8aSEric Auger sid, addr);
1235ed8449b3SEric Auger goto unlock;
1236ed8449b3SEric Auger }
1237ed8449b3SEric Auger
1238a7c1da8aSEric Auger read_fault = (flag & IOMMU_RO) &&
1239a7c1da8aSEric Auger !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
1240a7c1da8aSEric Auger write_fault = (flag & IOMMU_WO) &&
1241a7c1da8aSEric Auger !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
1242a7c1da8aSEric Auger
1243a7c1da8aSEric Auger flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
1244a7c1da8aSEric Auger flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
1245a7c1da8aSEric Auger if (flags) {
1246ed8449b3SEric Auger error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
1247ed8449b3SEric Auger __func__, addr, flag, mapping_value->flags);
1248a7c1da8aSEric Auger flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
1249a7c1da8aSEric Auger virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
1250a7c1da8aSEric Auger flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
1251a7c1da8aSEric Auger sid, addr);
1252ed8449b3SEric Auger goto unlock;
1253ed8449b3SEric Auger }
1254ed8449b3SEric Auger entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
1255ed8449b3SEric Auger entry.perm = flag;
1256ed8449b3SEric Auger trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
1257ed8449b3SEric Auger
1258ed8449b3SEric Auger unlock:
125908f2030aSZhenzhong Duan qemu_rec_mutex_unlock(&s->mutex);
1260cfb42188SEric Auger return entry;
1261cfb42188SEric Auger }
1262cfb42188SEric Auger
virtio_iommu_get_config(VirtIODevice * vdev,uint8_t * config_data)126322c37a10SEric Auger static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
126422c37a10SEric Auger {
126522c37a10SEric Auger VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
12663a411b2dSEric Auger struct virtio_iommu_config *dev_config = &dev->config;
12673a411b2dSEric Auger struct virtio_iommu_config *out_config = (void *)config_data;
126822c37a10SEric Auger
12693a411b2dSEric Auger out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
12703a411b2dSEric Auger out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
12713a411b2dSEric Auger out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
12723a411b2dSEric Auger out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
12733a411b2dSEric Auger out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
12743a411b2dSEric Auger out_config->probe_size = cpu_to_le32(dev_config->probe_size);
1275448179e3SJean-Philippe Brucker out_config->bypass = dev_config->bypass;
12763a411b2dSEric Auger
12773a411b2dSEric Auger trace_virtio_iommu_get_config(dev_config->page_size_mask,
12783a411b2dSEric Auger dev_config->input_range.start,
12793a411b2dSEric Auger dev_config->input_range.end,
12803a411b2dSEric Auger dev_config->domain_range.start,
12813a411b2dSEric Auger dev_config->domain_range.end,
1282448179e3SJean-Philippe Brucker dev_config->probe_size,
1283448179e3SJean-Philippe Brucker dev_config->bypass);
1284448179e3SJean-Philippe Brucker }
1285448179e3SJean-Philippe Brucker
virtio_iommu_set_config(VirtIODevice * vdev,const uint8_t * config_data)1286448179e3SJean-Philippe Brucker static void virtio_iommu_set_config(VirtIODevice *vdev,
1287448179e3SJean-Philippe Brucker const uint8_t *config_data)
1288448179e3SJean-Philippe Brucker {
1289448179e3SJean-Philippe Brucker VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
1290448179e3SJean-Philippe Brucker struct virtio_iommu_config *dev_config = &dev->config;
1291448179e3SJean-Philippe Brucker const struct virtio_iommu_config *in_config = (void *)config_data;
1292448179e3SJean-Philippe Brucker
1293448179e3SJean-Philippe Brucker if (in_config->bypass != dev_config->bypass) {
1294448179e3SJean-Philippe Brucker if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
1295448179e3SJean-Philippe Brucker virtio_error(vdev, "cannot set config.bypass");
1296448179e3SJean-Philippe Brucker return;
1297448179e3SJean-Philippe Brucker } else if (in_config->bypass != 0 && in_config->bypass != 1) {
1298448179e3SJean-Philippe Brucker virtio_error(vdev, "invalid config.bypass value '%u'",
1299448179e3SJean-Philippe Brucker in_config->bypass);
1300448179e3SJean-Philippe Brucker return;
1301448179e3SJean-Philippe Brucker }
1302448179e3SJean-Philippe Brucker dev_config->bypass = in_config->bypass;
130390519b90SZhenzhong Duan virtio_iommu_switch_address_space_all(dev);
1304448179e3SJean-Philippe Brucker }
1305448179e3SJean-Philippe Brucker
1306448179e3SJean-Philippe Brucker trace_virtio_iommu_set_config(in_config->bypass);
130722c37a10SEric Auger }
130822c37a10SEric Auger
virtio_iommu_get_features(VirtIODevice * vdev,uint64_t f,Error ** errp)130922c37a10SEric Auger static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
131022c37a10SEric Auger Error **errp)
131122c37a10SEric Auger {
131222c37a10SEric Auger VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
131322c37a10SEric Auger
131422c37a10SEric Auger f |= dev->features;
131522c37a10SEric Auger trace_virtio_iommu_get_features(f);
131622c37a10SEric Auger return f;
131722c37a10SEric Auger }
131822c37a10SEric Auger
int_cmp(gconstpointer a,gconstpointer b,gpointer user_data)1319cfb42188SEric Auger static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
1320cfb42188SEric Auger {
1321cfb42188SEric Auger guint ua = GPOINTER_TO_UINT(a);
1322cfb42188SEric Auger guint ub = GPOINTER_TO_UINT(b);
1323cfb42188SEric Auger return (ua > ub) - (ua < ub);
1324cfb42188SEric Auger }
1325cfb42188SEric Auger
virtio_iommu_remap(gpointer key,gpointer value,gpointer data)1326308e5e1bSBharat Bhushan static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
1327308e5e1bSBharat Bhushan {
1328308e5e1bSBharat Bhushan VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
1329308e5e1bSBharat Bhushan VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
1330308e5e1bSBharat Bhushan IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
1331308e5e1bSBharat Bhushan
1332308e5e1bSBharat Bhushan trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
1333308e5e1bSBharat Bhushan mapping->phys_addr);
1334308e5e1bSBharat Bhushan virtio_iommu_notify_map(mr, interval->low, interval->high,
1335308e5e1bSBharat Bhushan mapping->phys_addr, mapping->flags);
1336308e5e1bSBharat Bhushan return false;
1337308e5e1bSBharat Bhushan }
1338308e5e1bSBharat Bhushan
virtio_iommu_replay(IOMMUMemoryRegion * mr,IOMMUNotifier * n)1339308e5e1bSBharat Bhushan static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
1340308e5e1bSBharat Bhushan {
1341308e5e1bSBharat Bhushan IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
1342308e5e1bSBharat Bhushan VirtIOIOMMU *s = sdev->viommu;
1343308e5e1bSBharat Bhushan uint32_t sid;
1344308e5e1bSBharat Bhushan VirtIOIOMMUEndpoint *ep;
1345308e5e1bSBharat Bhushan
1346308e5e1bSBharat Bhushan sid = virtio_iommu_get_bdf(sdev);
1347308e5e1bSBharat Bhushan
134808f2030aSZhenzhong Duan qemu_rec_mutex_lock(&s->mutex);
1349308e5e1bSBharat Bhushan
1350308e5e1bSBharat Bhushan if (!s->endpoints) {
1351308e5e1bSBharat Bhushan goto unlock;
1352308e5e1bSBharat Bhushan }
1353308e5e1bSBharat Bhushan
1354308e5e1bSBharat Bhushan ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
1355308e5e1bSBharat Bhushan if (!ep || !ep->domain) {
1356308e5e1bSBharat Bhushan goto unlock;
1357308e5e1bSBharat Bhushan }
1358308e5e1bSBharat Bhushan
1359308e5e1bSBharat Bhushan g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
1360308e5e1bSBharat Bhushan
1361308e5e1bSBharat Bhushan unlock:
136208f2030aSZhenzhong Duan qemu_rec_mutex_unlock(&s->mutex);
1363308e5e1bSBharat Bhushan }
1364308e5e1bSBharat Bhushan
virtio_iommu_notify_flag_changed(IOMMUMemoryRegion * iommu_mr,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)13656978bfaaSBharat Bhushan static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
13666978bfaaSBharat Bhushan IOMMUNotifierFlag old,
13676978bfaaSBharat Bhushan IOMMUNotifierFlag new,
13686978bfaaSBharat Bhushan Error **errp)
13696978bfaaSBharat Bhushan {
1370958ec334SPeter Xu if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
1371958ec334SPeter Xu error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
1372958ec334SPeter Xu return -EINVAL;
1373958ec334SPeter Xu }
1374958ec334SPeter Xu
13756978bfaaSBharat Bhushan if (old == IOMMU_NOTIFIER_NONE) {
13766978bfaaSBharat Bhushan trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
13776978bfaaSBharat Bhushan } else if (new == IOMMU_NOTIFIER_NONE) {
13786978bfaaSBharat Bhushan trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
13796978bfaaSBharat Bhushan }
13806978bfaaSBharat Bhushan return 0;
13816978bfaaSBharat Bhushan }
13826978bfaaSBharat Bhushan
virtio_iommu_system_reset(void * opaque)1383448179e3SJean-Philippe Brucker static void virtio_iommu_system_reset(void *opaque)
1384448179e3SJean-Philippe Brucker {
1385448179e3SJean-Philippe Brucker VirtIOIOMMU *s = opaque;
1386448179e3SJean-Philippe Brucker
1387448179e3SJean-Philippe Brucker trace_virtio_iommu_system_reset();
1388448179e3SJean-Philippe Brucker
13899a457383SZhenzhong Duan memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
13909a457383SZhenzhong Duan
1391448179e3SJean-Philippe Brucker /*
1392448179e3SJean-Philippe Brucker * config.bypass is sticky across device reset, but should be restored on
1393448179e3SJean-Philippe Brucker * system reset
1394448179e3SJean-Philippe Brucker */
1395448179e3SJean-Philippe Brucker s->config.bypass = s->boot_bypass;
139690519b90SZhenzhong Duan virtio_iommu_switch_address_space_all(s);
139790519b90SZhenzhong Duan
1398448179e3SJean-Philippe Brucker }
1399448179e3SJean-Philippe Brucker
virtio_iommu_freeze_granule(Notifier * notifier,void * data)140094df5b21SEric Auger static void virtio_iommu_freeze_granule(Notifier *notifier, void *data)
140194df5b21SEric Auger {
140294df5b21SEric Auger VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done);
140394df5b21SEric Auger int granule;
140494df5b21SEric Auger
140594df5b21SEric Auger s->granule_frozen = true;
140694df5b21SEric Auger granule = ctz64(s->config.page_size_mask);
14071084feddSEric Auger trace_virtio_iommu_freeze_granule(BIT_ULL(granule));
140894df5b21SEric Auger }
140994df5b21SEric Auger
virtio_iommu_device_realize(DeviceState * dev,Error ** errp)141022c37a10SEric Auger static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
141122c37a10SEric Auger {
141222c37a10SEric Auger VirtIODevice *vdev = VIRTIO_DEVICE(dev);
141322c37a10SEric Auger VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
141422c37a10SEric Auger
14153857cd5cSJonah Palmer virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
141622c37a10SEric Auger
141722c37a10SEric Auger s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
141822c37a10SEric Auger virtio_iommu_handle_command);
141922c37a10SEric Auger s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
142022c37a10SEric Auger
142190519b90SZhenzhong Duan /*
142290519b90SZhenzhong Duan * config.bypass is needed to get initial address space early, such as
142390519b90SZhenzhong Duan * in vfio realize
142490519b90SZhenzhong Duan */
142590519b90SZhenzhong Duan s->config.bypass = s->boot_bypass;
142601e7e492SEric Auger if (s->aw_bits < 32 || s->aw_bits > 64) {
142701e7e492SEric Auger error_setg(errp, "aw-bits must be within [32,64]");
142801e7e492SEric Auger return;
142901e7e492SEric Auger }
143001e7e492SEric Auger s->config.input_range.end =
143101e7e492SEric Auger s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;
1432294ac5feSEric Auger
1433294ac5feSEric Auger switch (s->granule_mode) {
1434294ac5feSEric Auger case GRANULE_MODE_4K:
1435294ac5feSEric Auger s->config.page_size_mask = -(4 * KiB);
1436294ac5feSEric Auger break;
1437294ac5feSEric Auger case GRANULE_MODE_8K:
1438294ac5feSEric Auger s->config.page_size_mask = -(8 * KiB);
1439294ac5feSEric Auger break;
1440294ac5feSEric Auger case GRANULE_MODE_16K:
1441294ac5feSEric Auger s->config.page_size_mask = -(16 * KiB);
1442294ac5feSEric Auger break;
1443294ac5feSEric Auger case GRANULE_MODE_64K:
1444294ac5feSEric Auger s->config.page_size_mask = -(64 * KiB);
1445294ac5feSEric Auger break;
1446294ac5feSEric Auger case GRANULE_MODE_HOST:
1447294ac5feSEric Auger s->config.page_size_mask = qemu_real_host_page_mask();
1448294ac5feSEric Auger break;
1449294ac5feSEric Auger default:
1450294ac5feSEric Auger error_setg(errp, "Unsupported granule mode");
1451294ac5feSEric Auger }
14526b77ae05SEric Auger s->config.domain_range.end = UINT32_MAX;
14531733eebbSEric Auger s->config.probe_size = VIOMMU_PROBE_SIZE;
145422c37a10SEric Auger
145522c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
145622c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
145722c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
145822c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
145922c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
146022c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
146122c37a10SEric Auger virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
14621733eebbSEric Auger virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
1463448179e3SJean-Philippe Brucker virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
146422c37a10SEric Auger
146508f2030aSZhenzhong Duan qemu_rec_mutex_init(&s->mutex);
1466cfb42188SEric Auger
1467cfb42188SEric Auger s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
1468cfb42188SEric Auger
1469817ef10dSEric Auger s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal,
1470817ef10dSEric Auger g_free, hiod_destroy);
1471817ef10dSEric Auger
1472cfb42188SEric Auger if (s->primary_bus) {
1473ba7d12ebSYi Liu pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
1474cfb42188SEric Auger } else {
1475cfb42188SEric Auger error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
1476cfb42188SEric Auger }
1477448179e3SJean-Philippe Brucker
147894df5b21SEric Auger s->machine_done.notify = virtio_iommu_freeze_granule;
147994df5b21SEric Auger qemu_add_machine_init_done_notifier(&s->machine_done);
148094df5b21SEric Auger
1481448179e3SJean-Philippe Brucker qemu_register_reset(virtio_iommu_system_reset, s);
148222c37a10SEric Auger }
148322c37a10SEric Auger
virtio_iommu_device_unrealize(DeviceState * dev)1484b69c3c21SMarkus Armbruster static void virtio_iommu_device_unrealize(DeviceState *dev)
148522c37a10SEric Auger {
148622c37a10SEric Auger VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1487cfb42188SEric Auger VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1488cfb42188SEric Auger
1489448179e3SJean-Philippe Brucker qemu_unregister_reset(virtio_iommu_system_reset, s);
149094df5b21SEric Auger qemu_remove_machine_init_done_notifier(&s->machine_done);
1491448179e3SJean-Philippe Brucker
1492de38ed30SPan Nengyuan g_hash_table_destroy(s->as_by_busptr);
149359bf980dSEric Auger if (s->domains) {
1494cfb42188SEric Auger g_tree_destroy(s->domains);
149559bf980dSEric Auger }
149659bf980dSEric Auger if (s->endpoints) {
1497cfb42188SEric Auger g_tree_destroy(s->endpoints);
149859bf980dSEric Auger }
149922c37a10SEric Auger
150008f2030aSZhenzhong Duan qemu_rec_mutex_destroy(&s->mutex);
150108f2030aSZhenzhong Duan
1502de38ed30SPan Nengyuan virtio_delete_queue(s->req_vq);
1503de38ed30SPan Nengyuan virtio_delete_queue(s->event_vq);
150422c37a10SEric Auger virtio_cleanup(vdev);
150522c37a10SEric Auger }
150622c37a10SEric Auger
virtio_iommu_device_reset(VirtIODevice * vdev)150722c37a10SEric Auger static void virtio_iommu_device_reset(VirtIODevice *vdev)
150822c37a10SEric Auger {
1509cfb42188SEric Auger VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
1510cfb42188SEric Auger
151122c37a10SEric Auger trace_virtio_iommu_device_reset();
1512cfb42188SEric Auger
1513cfb42188SEric Auger if (s->domains) {
1514cfb42188SEric Auger g_tree_destroy(s->domains);
1515cfb42188SEric Auger }
1516cfb42188SEric Auger if (s->endpoints) {
1517cfb42188SEric Auger g_tree_destroy(s->endpoints);
1518cfb42188SEric Auger }
1519cfb42188SEric Auger s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
1520cfb42188SEric Auger NULL, NULL, virtio_iommu_put_domain);
1521cfb42188SEric Auger s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
1522cfb42188SEric Auger NULL, NULL, virtio_iommu_put_endpoint);
152322c37a10SEric Auger }
152422c37a10SEric Auger
virtio_iommu_set_status(VirtIODevice * vdev,uint8_t status)152522c37a10SEric Auger static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
152622c37a10SEric Auger {
152722c37a10SEric Auger trace_virtio_iommu_device_status(status);
152822c37a10SEric Auger }
152922c37a10SEric Auger
virtio_iommu_instance_init(Object * obj)153022c37a10SEric Auger static void virtio_iommu_instance_init(Object *obj)
153122c37a10SEric Auger {
153222c37a10SEric Auger }
153322c37a10SEric Auger
1534bd0ab870SEric Auger #define VMSTATE_INTERVAL \
1535bd0ab870SEric Auger { \
1536bd0ab870SEric Auger .name = "interval", \
1537bd0ab870SEric Auger .version_id = 1, \
1538bd0ab870SEric Auger .minimum_version_id = 1, \
1539ca02a170SRichard Henderson .fields = (const VMStateField[]) { \
1540bd0ab870SEric Auger VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1541bd0ab870SEric Auger VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1542bd0ab870SEric Auger VMSTATE_END_OF_LIST() \
1543bd0ab870SEric Auger } \
1544bd0ab870SEric Auger }
1545bd0ab870SEric Auger
1546bd0ab870SEric Auger #define VMSTATE_MAPPING \
1547bd0ab870SEric Auger { \
1548bd0ab870SEric Auger .name = "mapping", \
1549bd0ab870SEric Auger .version_id = 1, \
1550bd0ab870SEric Auger .minimum_version_id = 1, \
1551ca02a170SRichard Henderson .fields = (const VMStateField[]) { \
1552bd0ab870SEric Auger VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1553bd0ab870SEric Auger VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1554bd0ab870SEric Auger VMSTATE_END_OF_LIST() \
1555bd0ab870SEric Auger }, \
1556bd0ab870SEric Auger }
1557bd0ab870SEric Auger
1558bd0ab870SEric Auger static const VMStateDescription vmstate_interval_mapping[2] = {
1559bd0ab870SEric Auger VMSTATE_MAPPING, /* value */
1560bd0ab870SEric Auger VMSTATE_INTERVAL /* key */
1561bd0ab870SEric Auger };
1562bd0ab870SEric Auger
domain_preload(void * opaque)1563bd0ab870SEric Auger static int domain_preload(void *opaque)
1564bd0ab870SEric Auger {
1565bd0ab870SEric Auger VirtIOIOMMUDomain *domain = opaque;
1566bd0ab870SEric Auger
1567bd0ab870SEric Auger domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
1568bd0ab870SEric Auger NULL, g_free, g_free);
1569bd0ab870SEric Auger return 0;
1570bd0ab870SEric Auger }
1571bd0ab870SEric Auger
1572bd0ab870SEric Auger static const VMStateDescription vmstate_endpoint = {
1573bd0ab870SEric Auger .name = "endpoint",
1574bd0ab870SEric Auger .version_id = 1,
1575bd0ab870SEric Auger .minimum_version_id = 1,
1576ca02a170SRichard Henderson .fields = (const VMStateField[]) {
1577bd0ab870SEric Auger VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
1578bd0ab870SEric Auger VMSTATE_END_OF_LIST()
1579bd0ab870SEric Auger }
1580bd0ab870SEric Auger };
1581bd0ab870SEric Auger
1582bd0ab870SEric Auger static const VMStateDescription vmstate_domain = {
1583bd0ab870SEric Auger .name = "domain",
1584d9c96f24SJean-Philippe Brucker .version_id = 2,
1585d9c96f24SJean-Philippe Brucker .minimum_version_id = 2,
1586bd0ab870SEric Auger .pre_load = domain_preload,
1587ca02a170SRichard Henderson .fields = (const VMStateField[]) {
1588bd0ab870SEric Auger VMSTATE_UINT32(id, VirtIOIOMMUDomain),
1589bd0ab870SEric Auger VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
1590bd0ab870SEric Auger vmstate_interval_mapping,
1591bd0ab870SEric Auger VirtIOIOMMUInterval, VirtIOIOMMUMapping),
1592bd0ab870SEric Auger VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
1593bd0ab870SEric Auger vmstate_endpoint, VirtIOIOMMUEndpoint, next),
1594d9c96f24SJean-Philippe Brucker VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
1595bd0ab870SEric Auger VMSTATE_END_OF_LIST()
1596bd0ab870SEric Auger }
1597bd0ab870SEric Auger };
1598bd0ab870SEric Auger
reconstruct_endpoints(gpointer key,gpointer value,gpointer data)1599bd0ab870SEric Auger static gboolean reconstruct_endpoints(gpointer key, gpointer value,
1600bd0ab870SEric Auger gpointer data)
1601bd0ab870SEric Auger {
1602bd0ab870SEric Auger VirtIOIOMMU *s = (VirtIOIOMMU *)data;
1603bd0ab870SEric Auger VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
1604bd0ab870SEric Auger VirtIOIOMMUEndpoint *iter;
160531aa323fSJean-Philippe Brucker IOMMUMemoryRegion *mr;
1606bd0ab870SEric Auger
1607bd0ab870SEric Auger QLIST_FOREACH(iter, &d->endpoint_list, next) {
160831aa323fSJean-Philippe Brucker mr = virtio_iommu_mr(s, iter->id);
160931aa323fSJean-Philippe Brucker assert(mr);
161031aa323fSJean-Philippe Brucker
1611bd0ab870SEric Auger iter->domain = d;
161231aa323fSJean-Philippe Brucker iter->iommu_mr = mr;
1613bd0ab870SEric Auger g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
1614bd0ab870SEric Auger }
1615bd0ab870SEric Auger return false; /* continue the domain traversal */
1616bd0ab870SEric Auger }
1617bd0ab870SEric Auger
iommu_post_load(void * opaque,int version_id)1618bd0ab870SEric Auger static int iommu_post_load(void *opaque, int version_id)
1619bd0ab870SEric Auger {
1620bd0ab870SEric Auger VirtIOIOMMU *s = opaque;
1621bd0ab870SEric Auger
1622bd0ab870SEric Auger g_tree_foreach(s->domains, reconstruct_endpoints, s);
1623d355566bSZhenzhong Duan
1624d355566bSZhenzhong Duan /*
1625d355566bSZhenzhong Duan * Memory regions are dynamically turned on/off depending on
1626d355566bSZhenzhong Duan * 'config.bypass' and attached domain type if there is. After
1627d355566bSZhenzhong Duan * migration, we need to make sure the memory regions are
1628d355566bSZhenzhong Duan * still correct.
1629d355566bSZhenzhong Duan */
1630d355566bSZhenzhong Duan virtio_iommu_switch_address_space_all(s);
1631bd0ab870SEric Auger return 0;
1632bd0ab870SEric Auger }
1633bd0ab870SEric Auger
1634bd0ab870SEric Auger static const VMStateDescription vmstate_virtio_iommu_device = {
1635bd0ab870SEric Auger .name = "virtio-iommu-device",
1636448179e3SJean-Philippe Brucker .minimum_version_id = 2,
1637448179e3SJean-Philippe Brucker .version_id = 2,
1638bd0ab870SEric Auger .post_load = iommu_post_load,
1639ca02a170SRichard Henderson .fields = (const VMStateField[]) {
1640d9c96f24SJean-Philippe Brucker VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
1641bd0ab870SEric Auger &vmstate_domain, VirtIOIOMMUDomain),
1642448179e3SJean-Philippe Brucker VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
1643bd0ab870SEric Auger VMSTATE_END_OF_LIST()
1644bd0ab870SEric Auger },
1645bd0ab870SEric Auger };
1646bd0ab870SEric Auger
164722c37a10SEric Auger static const VMStateDescription vmstate_virtio_iommu = {
164822c37a10SEric Auger .name = "virtio-iommu",
1649448179e3SJean-Philippe Brucker .minimum_version_id = 2,
1650bd0ab870SEric Auger .priority = MIG_PRI_IOMMU,
1651448179e3SJean-Philippe Brucker .version_id = 2,
1652ca02a170SRichard Henderson .fields = (const VMStateField[]) {
165322c37a10SEric Auger VMSTATE_VIRTIO_DEVICE,
165422c37a10SEric Auger VMSTATE_END_OF_LIST()
165522c37a10SEric Auger },
165622c37a10SEric Auger };
165722c37a10SEric Auger
165822c37a10SEric Auger static Property virtio_iommu_properties[] = {
1659c45e7619SPhilippe Mathieu-Daudé DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
1660c45e7619SPhilippe Mathieu-Daudé TYPE_PCI_BUS, PCIBus *),
1661448179e3SJean-Philippe Brucker DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
1662294ac5feSEric Auger DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
16639dd5e808SEric Auger GRANULE_MODE_HOST),
166401e7e492SEric Auger DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
166522c37a10SEric Auger DEFINE_PROP_END_OF_LIST(),
166622c37a10SEric Auger };
166722c37a10SEric Auger
virtio_iommu_class_init(ObjectClass * klass,void * data)166822c37a10SEric Auger static void virtio_iommu_class_init(ObjectClass *klass, void *data)
166922c37a10SEric Auger {
167022c37a10SEric Auger DeviceClass *dc = DEVICE_CLASS(klass);
167122c37a10SEric Auger VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
167222c37a10SEric Auger
167322c37a10SEric Auger device_class_set_props(dc, virtio_iommu_properties);
167422c37a10SEric Auger dc->vmsd = &vmstate_virtio_iommu;
167522c37a10SEric Auger
167622c37a10SEric Auger set_bit(DEVICE_CATEGORY_MISC, dc->categories);
167722c37a10SEric Auger vdc->realize = virtio_iommu_device_realize;
167822c37a10SEric Auger vdc->unrealize = virtio_iommu_device_unrealize;
167922c37a10SEric Auger vdc->reset = virtio_iommu_device_reset;
168022c37a10SEric Auger vdc->get_config = virtio_iommu_get_config;
1681448179e3SJean-Philippe Brucker vdc->set_config = virtio_iommu_set_config;
168222c37a10SEric Auger vdc->get_features = virtio_iommu_get_features;
168322c37a10SEric Auger vdc->set_status = virtio_iommu_set_status;
168422c37a10SEric Auger vdc->vmsd = &vmstate_virtio_iommu_device;
168522c37a10SEric Auger }
168622c37a10SEric Auger
virtio_iommu_memory_region_class_init(ObjectClass * klass,void * data)1687cfb42188SEric Auger static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
1688cfb42188SEric Auger void *data)
1689cfb42188SEric Auger {
1690cfb42188SEric Auger IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1691cfb42188SEric Auger
1692cfb42188SEric Auger imrc->translate = virtio_iommu_translate;
1693308e5e1bSBharat Bhushan imrc->replay = virtio_iommu_replay;
16946978bfaaSBharat Bhushan imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
1695cfb42188SEric Auger }
1696cfb42188SEric Auger
169722c37a10SEric Auger static const TypeInfo virtio_iommu_info = {
169822c37a10SEric Auger .name = TYPE_VIRTIO_IOMMU,
169922c37a10SEric Auger .parent = TYPE_VIRTIO_DEVICE,
170022c37a10SEric Auger .instance_size = sizeof(VirtIOIOMMU),
170122c37a10SEric Auger .instance_init = virtio_iommu_instance_init,
170222c37a10SEric Auger .class_init = virtio_iommu_class_init,
170322c37a10SEric Auger };
170422c37a10SEric Auger
1705cfb42188SEric Auger static const TypeInfo virtio_iommu_memory_region_info = {
1706cfb42188SEric Auger .parent = TYPE_IOMMU_MEMORY_REGION,
1707cfb42188SEric Auger .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
1708cfb42188SEric Auger .class_init = virtio_iommu_memory_region_class_init,
1709cfb42188SEric Auger };
1710cfb42188SEric Auger
virtio_register_types(void)171122c37a10SEric Auger static void virtio_register_types(void)
171222c37a10SEric Auger {
171322c37a10SEric Auger type_register_static(&virtio_iommu_info);
1714cfb42188SEric Auger type_register_static(&virtio_iommu_memory_region_info);
171522c37a10SEric Auger }
171622c37a10SEric Auger
171722c37a10SEric Auger type_init(virtio_register_types)
1718