1 /* 2 * low level and IOMMU backend agnostic helpers used by VFIO devices, 3 * related to regions, interrupts, capabilities 4 * 5 * Copyright Red Hat, Inc. 2012 6 * 7 * Authors: 8 * Alex Williamson <alex.williamson@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2. See 11 * the COPYING file in the top-level directory. 12 * 13 * Based on qemu-kvm device-assignment: 14 * Adapted for KVM by Qumranet. 15 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) 16 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) 17 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) 18 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) 19 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) 20 */ 21 22 #include "qemu/osdep.h" 23 #include <sys/ioctl.h> 24 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/vfio/pci.h" 27 #include "hw/hw.h" 28 #include "trace.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "qemu/units.h" 32 #include "monitor/monitor.h" 33 34 /* 35 * Common VFIO interrupt disable 36 */ 37 void vfio_disable_irqindex(VFIODevice *vbasedev, int index) 38 { 39 struct vfio_irq_set irq_set = { 40 .argsz = sizeof(irq_set), 41 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 42 .index = index, 43 .start = 0, 44 .count = 0, 45 }; 46 47 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 48 } 49 50 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) 51 { 52 struct vfio_irq_set irq_set = { 53 .argsz = sizeof(irq_set), 54 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 55 .index = index, 56 .start = 0, 57 .count = 1, 58 }; 59 60 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 61 } 62 63 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) 64 { 65 struct vfio_irq_set irq_set = { 66 .argsz = sizeof(irq_set), 67 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, 68 .index = index, 69 .start = 0, 70 .count = 1, 71 }; 72 73 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 74 } 75 76 static inline const char *action_to_str(int action) 77 { 78 switch (action) { 79 case VFIO_IRQ_SET_ACTION_MASK: 80 return "MASK"; 81 case VFIO_IRQ_SET_ACTION_UNMASK: 82 return "UNMASK"; 83 case VFIO_IRQ_SET_ACTION_TRIGGER: 84 return "TRIGGER"; 85 default: 86 return "UNKNOWN ACTION"; 87 } 88 } 89 90 static const char *index_to_str(VFIODevice *vbasedev, int index) 91 { 92 if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { 93 return NULL; 94 } 95 96 switch (index) { 97 case VFIO_PCI_INTX_IRQ_INDEX: 98 return "INTX"; 99 case VFIO_PCI_MSI_IRQ_INDEX: 100 return "MSI"; 101 case VFIO_PCI_MSIX_IRQ_INDEX: 102 return "MSIX"; 103 case VFIO_PCI_ERR_IRQ_INDEX: 104 return "ERR"; 105 case VFIO_PCI_REQ_IRQ_INDEX: 106 return "REQ"; 107 default: 108 return NULL; 109 } 110 } 111 112 bool vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, 113 int action, int fd, Error **errp) 114 { 115 ERRP_GUARD(); 116 g_autofree struct vfio_irq_set *irq_set = NULL; 117 int argsz; 118 const char *name; 119 int32_t *pfd; 120 121 argsz = sizeof(*irq_set) + sizeof(*pfd); 122 123 irq_set = g_malloc0(argsz); 124 irq_set->argsz = argsz; 125 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action; 126 irq_set->index = index; 127 irq_set->start = subindex; 128 irq_set->count = 1; 129 pfd = (int32_t *)&irq_set->data; 130 *pfd = fd; 131 132 if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { 133 return true; 134 } 135 136 error_setg_errno(errp, errno, "VFIO_DEVICE_SET_IRQS failure"); 137 138 name = index_to_str(vbasedev, index); 139 if (name) { 140 error_prepend(errp, "%s-%d: ", name, subindex); 141 } else { 142 error_prepend(errp, "index %d-%d: ", index, subindex); 143 } 144 error_prepend(errp, 145 "Failed to %s %s eventfd signaling for interrupt ", 146 fd < 0 ? "tear down" : "set up", action_to_str(action)); 147 return false; 148 } 149 150 int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size) 151 { 152 vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); 153 vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / 154 BITS_PER_BYTE; 155 vbmap->bitmap = g_try_malloc0(vbmap->size); 156 if (!vbmap->bitmap) { 157 return -ENOMEM; 158 } 159 160 return 0; 161 } 162 163 struct vfio_info_cap_header * 164 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) 165 { 166 struct vfio_info_cap_header *hdr; 167 168 for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { 169 if (hdr->id == id) { 170 return hdr; 171 } 172 } 173 174 return NULL; 175 } 176 177 struct vfio_info_cap_header * 178 vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) 179 { 180 if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { 181 return NULL; 182 } 183 184 return vfio_get_cap((void *)info, info->cap_offset, id); 185 } 186 187 struct vfio_info_cap_header * 188 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id) 189 { 190 if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) { 191 return NULL; 192 } 193 194 return vfio_get_cap((void *)info, info->cap_offset, id); 195 } 196 197 int vfio_get_region_info(VFIODevice *vbasedev, int index, 198 struct vfio_region_info **info) 199 { 200 size_t argsz = sizeof(struct vfio_region_info); 201 202 *info = g_malloc0(argsz); 203 204 (*info)->index = index; 205 retry: 206 (*info)->argsz = argsz; 207 208 if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { 209 g_free(*info); 210 *info = NULL; 211 return -errno; 212 } 213 214 if ((*info)->argsz > argsz) { 215 argsz = (*info)->argsz; 216 *info = g_realloc(*info, argsz); 217 218 goto retry; 219 } 220 221 return 0; 222 } 223 224 int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, 225 uint32_t subtype, struct vfio_region_info **info) 226 { 227 int i; 228 229 for (i = 0; i < vbasedev->num_regions; i++) { 230 struct vfio_info_cap_header *hdr; 231 struct vfio_region_info_cap_type *cap_type; 232 233 if (vfio_get_region_info(vbasedev, i, info)) { 234 continue; 235 } 236 237 hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); 238 if (!hdr) { 239 g_free(*info); 240 continue; 241 } 242 243 cap_type = container_of(hdr, struct vfio_region_info_cap_type, header); 244 245 trace_vfio_get_dev_region(vbasedev->name, i, 246 cap_type->type, cap_type->subtype); 247 248 if (cap_type->type == type && cap_type->subtype == subtype) { 249 return 0; 250 } 251 252 g_free(*info); 253 } 254 255 *info = NULL; 256 return -ENODEV; 257 } 258 259 bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) 260 { 261 g_autofree struct vfio_region_info *info = NULL; 262 bool ret = false; 263 264 if (!vfio_get_region_info(vbasedev, region, &info)) { 265 if (vfio_get_region_info_cap(info, cap_type)) { 266 ret = true; 267 } 268 } 269 270 return ret; 271 } 272 273 bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) 274 { 275 ERRP_GUARD(); 276 struct stat st; 277 278 if (vbasedev->fd < 0) { 279 if (stat(vbasedev->sysfsdev, &st) < 0) { 280 error_setg_errno(errp, errno, "no such host device"); 281 error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); 282 return false; 283 } 284 /* User may specify a name, e.g: VFIO platform device */ 285 if (!vbasedev->name) { 286 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); 287 } 288 } else { 289 if (!vbasedev->iommufd) { 290 error_setg(errp, "Use FD passing only with iommufd backend"); 291 return false; 292 } 293 /* 294 * Give a name with fd so any function printing out vbasedev->name 295 * will not break. 296 */ 297 if (!vbasedev->name) { 298 vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); 299 } 300 } 301 302 return true; 303 } 304 305 void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) 306 { 307 ERRP_GUARD(); 308 int fd = monitor_fd_param(monitor_cur(), str, errp); 309 310 if (fd < 0) { 311 error_prepend(errp, "Could not parse remote object fd %s:", str); 312 return; 313 } 314 vbasedev->fd = fd; 315 } 316 317 void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, 318 DeviceState *dev, bool ram_discard) 319 { 320 vbasedev->type = type; 321 vbasedev->ops = ops; 322 vbasedev->dev = dev; 323 vbasedev->fd = -1; 324 325 vbasedev->ram_block_discard_allowed = ram_discard; 326 } 327 328 int vfio_device_get_aw_bits(VFIODevice *vdev) 329 { 330 /* 331 * iova_ranges is a sorted list. For old kernels that support 332 * VFIO but not support query of iova ranges, iova_ranges is NULL, 333 * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned. 334 */ 335 GList *l = g_list_last(vdev->bcontainer->iova_ranges); 336 337 if (l) { 338 Range *range = l->data; 339 return range_get_last_bit(range) + 1; 340 } 341 342 return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; 343 } 344 345 bool vfio_device_is_mdev(VFIODevice *vbasedev) 346 { 347 g_autofree char *subsys = NULL; 348 g_autofree char *tmp = NULL; 349 350 if (!vbasedev->sysfsdev) { 351 return false; 352 } 353 354 tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev); 355 subsys = realpath(tmp, NULL); 356 return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); 357 } 358 359 bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp) 360 { 361 HostIOMMUDevice *hiod = vbasedev->hiod; 362 363 if (!hiod) { 364 return true; 365 } 366 367 return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp); 368 } 369 370 VFIODevice *vfio_get_vfio_device(Object *obj) 371 { 372 if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { 373 return &VFIO_PCI(obj)->vbasedev; 374 } else { 375 return NULL; 376 } 377 } 378