1 /* 2 * VFIO BASE CONTAINER 3 * 4 * Copyright (C) 2023 Intel Corporation. 5 * Copyright Red Hat, Inc. 2023 6 * 7 * Authors: Yi Liu <yi.l.liu@intel.com> 8 * Eric Auger <eric.auger@redhat.com> 9 * 10 * SPDX-License-Identifier: GPL-2.0-or-later 11 */ 12 13 #include <sys/ioctl.h> 14 #include <linux/vfio.h> 15 16 #include "qemu/osdep.h" 17 #include "system/tcg.h" 18 #include "system/ram_addr.h" 19 #include "qapi/error.h" 20 #include "qemu/error-report.h" 21 #include "hw/vfio/vfio-container-base.h" 22 #include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */ 23 #include "system/reset.h" 24 #include "vfio-helpers.h" 25 26 #include "trace.h" 27 28 static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = 29 QLIST_HEAD_INITIALIZER(vfio_address_spaces); 30 31 VFIOAddressSpace *vfio_address_space_get(AddressSpace *as) 32 { 33 VFIOAddressSpace *space; 34 35 QLIST_FOREACH(space, &vfio_address_spaces, list) { 36 if (space->as == as) { 37 return space; 38 } 39 } 40 41 /* No suitable VFIOAddressSpace, create a new one */ 42 space = g_malloc0(sizeof(*space)); 43 space->as = as; 44 QLIST_INIT(&space->containers); 45 46 if (QLIST_EMPTY(&vfio_address_spaces)) { 47 qemu_register_reset(vfio_device_reset_handler, NULL); 48 } 49 50 QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); 51 52 return space; 53 } 54 55 void vfio_address_space_put(VFIOAddressSpace *space) 56 { 57 if (!QLIST_EMPTY(&space->containers)) { 58 return; 59 } 60 61 QLIST_REMOVE(space, list); 62 g_free(space); 63 64 if (QLIST_EMPTY(&vfio_address_spaces)) { 65 qemu_unregister_reset(vfio_device_reset_handler, NULL); 66 } 67 } 68 69 void vfio_address_space_insert(VFIOAddressSpace *space, 70 VFIOContainerBase *bcontainer) 71 { 72 QLIST_INSERT_HEAD(&space->containers, bcontainer, next); 73 bcontainer->space = space; 74 } 75 76 int vfio_container_dma_map(VFIOContainerBase *bcontainer, 77 hwaddr iova, ram_addr_t size, 78 void *vaddr, bool readonly, MemoryRegion *mr) 79 { 80 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 81 RAMBlock *rb = mr->ram_block; 82 int mfd = rb ? qemu_ram_get_fd(rb) : -1; 83 84 if (mfd >= 0 && vioc->dma_map_file) { 85 unsigned long start = vaddr - qemu_ram_get_host_addr(rb); 86 unsigned long offset = qemu_ram_get_fd_offset(rb); 87 88 return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, 89 readonly); 90 } 91 g_assert(vioc->dma_map); 92 return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); 93 } 94 95 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, 96 hwaddr iova, ram_addr_t size, 97 IOMMUTLBEntry *iotlb, bool unmap_all) 98 { 99 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 100 101 g_assert(vioc->dma_unmap); 102 return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); 103 } 104 105 bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, 106 MemoryRegionSection *section, 107 Error **errp) 108 { 109 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 110 111 if (!vioc->add_window) { 112 return true; 113 } 114 115 return vioc->add_window(bcontainer, section, errp); 116 } 117 118 void vfio_container_del_section_window(VFIOContainerBase *bcontainer, 119 MemoryRegionSection *section) 120 { 121 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 122 123 if (!vioc->del_window) { 124 return; 125 } 126 127 return vioc->del_window(bcontainer, section); 128 } 129 130 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, 131 bool start, Error **errp) 132 { 133 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 134 int ret; 135 136 if (!bcontainer->dirty_pages_supported) { 137 return 0; 138 } 139 140 g_assert(vioc->set_dirty_page_tracking); 141 if (bcontainer->dirty_pages_started == start) { 142 return 0; 143 } 144 145 ret = vioc->set_dirty_page_tracking(bcontainer, start, errp); 146 if (!ret) { 147 bcontainer->dirty_pages_started = start; 148 } 149 150 return ret; 151 } 152 153 static bool vfio_container_devices_dirty_tracking_is_started( 154 const VFIOContainerBase *bcontainer) 155 { 156 VFIODevice *vbasedev; 157 158 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { 159 if (!vbasedev->dirty_tracking) { 160 return false; 161 } 162 } 163 164 return true; 165 } 166 167 bool vfio_container_dirty_tracking_is_started( 168 const VFIOContainerBase *bcontainer) 169 { 170 return vfio_container_devices_dirty_tracking_is_started(bcontainer) || 171 bcontainer->dirty_pages_started; 172 } 173 174 bool vfio_container_devices_dirty_tracking_is_supported( 175 const VFIOContainerBase *bcontainer) 176 { 177 VFIODevice *vbasedev; 178 179 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { 180 if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) { 181 return false; 182 } 183 if (!vbasedev->dirty_pages_supported) { 184 return false; 185 } 186 } 187 188 return true; 189 } 190 191 static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, 192 hwaddr size, void *bitmap) 193 { 194 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + 195 sizeof(struct vfio_device_feature_dma_logging_report), 196 sizeof(uint64_t))] = {}; 197 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; 198 struct vfio_device_feature_dma_logging_report *report = 199 (struct vfio_device_feature_dma_logging_report *)feature->data; 200 201 report->iova = iova; 202 report->length = size; 203 report->page_size = qemu_real_host_page_size(); 204 report->bitmap = (uintptr_t)bitmap; 205 206 feature->argsz = sizeof(buf); 207 feature->flags = VFIO_DEVICE_FEATURE_GET | 208 VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; 209 210 return vbasedev->io_ops->device_feature(vbasedev, feature); 211 } 212 213 static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 214 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) 215 { 216 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 217 218 g_assert(vioc->query_dirty_bitmap); 219 return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, 220 errp); 221 } 222 223 static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 224 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) 225 { 226 VFIODevice *vbasedev; 227 int ret; 228 229 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { 230 ret = vfio_device_dma_logging_report(vbasedev, iova, size, 231 vbmap->bitmap); 232 if (ret) { 233 error_setg_errno(errp, -ret, 234 "%s: Failed to get DMA logging report, iova: " 235 "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx, 236 vbasedev->name, iova, size); 237 238 return ret; 239 } 240 } 241 242 return 0; 243 } 244 245 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, 246 uint64_t size, ram_addr_t ram_addr, Error **errp) 247 { 248 bool all_device_dirty_tracking = 249 vfio_container_devices_dirty_tracking_is_supported(bcontainer); 250 uint64_t dirty_pages; 251 VFIOBitmap vbmap; 252 int ret; 253 254 if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { 255 cpu_physical_memory_set_dirty_range(ram_addr, size, 256 tcg_enabled() ? DIRTY_CLIENTS_ALL : 257 DIRTY_CLIENTS_NOCODE); 258 return 0; 259 } 260 261 ret = vfio_bitmap_alloc(&vbmap, size); 262 if (ret) { 263 error_setg_errno(errp, -ret, 264 "Failed to allocate dirty tracking bitmap"); 265 return ret; 266 } 267 268 if (all_device_dirty_tracking) { 269 ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size, 270 errp); 271 } else { 272 ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size, 273 errp); 274 } 275 276 if (ret) { 277 goto out; 278 } 279 280 dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, 281 vbmap.pages); 282 283 trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr, 284 dirty_pages); 285 out: 286 g_free(vbmap.bitmap); 287 288 return ret; 289 } 290 291 static gpointer copy_iova_range(gconstpointer src, gpointer data) 292 { 293 Range *source = (Range *)src; 294 Range *dest = g_new(Range, 1); 295 296 range_set_bounds(dest, range_lob(source), range_upb(source)); 297 return dest; 298 } 299 300 GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer) 301 { 302 assert(bcontainer); 303 return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL); 304 } 305 306 static void vfio_container_instance_finalize(Object *obj) 307 { 308 VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); 309 VFIOGuestIOMMU *giommu, *tmp; 310 311 QLIST_SAFE_REMOVE(bcontainer, next); 312 313 QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { 314 memory_region_unregister_iommu_notifier( 315 MEMORY_REGION(giommu->iommu_mr), &giommu->n); 316 QLIST_REMOVE(giommu, giommu_next); 317 g_free(giommu); 318 } 319 320 g_list_free_full(bcontainer->iova_ranges, g_free); 321 } 322 323 static void vfio_container_instance_init(Object *obj) 324 { 325 VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); 326 327 bcontainer->error = NULL; 328 bcontainer->dirty_pages_supported = false; 329 bcontainer->dma_max_mappings = 0; 330 bcontainer->iova_ranges = NULL; 331 QLIST_INIT(&bcontainer->giommu_list); 332 QLIST_INIT(&bcontainer->vrdl_list); 333 } 334 335 static const TypeInfo types[] = { 336 { 337 .name = TYPE_VFIO_IOMMU, 338 .parent = TYPE_OBJECT, 339 .instance_init = vfio_container_instance_init, 340 .instance_finalize = vfio_container_instance_finalize, 341 .instance_size = sizeof(VFIOContainerBase), 342 .class_size = sizeof(VFIOIOMMUClass), 343 .abstract = true, 344 }, 345 }; 346 347 DEFINE_TYPES(types) 348