xref: /openbmc/qemu/hw/vfio/helpers.c (revision 221caadc)
1 /*
2  * low level and IOMMU backend agnostic helpers used by VFIO devices,
3  * related to regions, interrupts, capabilities
4  *
5  * Copyright Red Hat, Inc. 2012
6  *
7  * Authors:
8  *  Alex Williamson <alex.williamson@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  *
13  * Based on qemu-kvm device-assignment:
14  *  Adapted for KVM by Qumranet.
15  *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
16  *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
17  *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
18  *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
19  *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
20  */
21 
22 #include "qemu/osdep.h"
23 #include <sys/ioctl.h>
24 
25 #include "hw/vfio/vfio-common.h"
26 #include "hw/hw.h"
27 #include "trace.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 
31 /*
32  * Common VFIO interrupt disable
33  */
34 void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
35 {
36     struct vfio_irq_set irq_set = {
37         .argsz = sizeof(irq_set),
38         .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
39         .index = index,
40         .start = 0,
41         .count = 0,
42     };
43 
44     ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
45 }
46 
47 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
48 {
49     struct vfio_irq_set irq_set = {
50         .argsz = sizeof(irq_set),
51         .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
52         .index = index,
53         .start = 0,
54         .count = 1,
55     };
56 
57     ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
58 }
59 
60 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
61 {
62     struct vfio_irq_set irq_set = {
63         .argsz = sizeof(irq_set),
64         .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
65         .index = index,
66         .start = 0,
67         .count = 1,
68     };
69 
70     ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
71 }
72 
73 static inline const char *action_to_str(int action)
74 {
75     switch (action) {
76     case VFIO_IRQ_SET_ACTION_MASK:
77         return "MASK";
78     case VFIO_IRQ_SET_ACTION_UNMASK:
79         return "UNMASK";
80     case VFIO_IRQ_SET_ACTION_TRIGGER:
81         return "TRIGGER";
82     default:
83         return "UNKNOWN ACTION";
84     }
85 }
86 
87 static const char *index_to_str(VFIODevice *vbasedev, int index)
88 {
89     if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
90         return NULL;
91     }
92 
93     switch (index) {
94     case VFIO_PCI_INTX_IRQ_INDEX:
95         return "INTX";
96     case VFIO_PCI_MSI_IRQ_INDEX:
97         return "MSI";
98     case VFIO_PCI_MSIX_IRQ_INDEX:
99         return "MSIX";
100     case VFIO_PCI_ERR_IRQ_INDEX:
101         return "ERR";
102     case VFIO_PCI_REQ_IRQ_INDEX:
103         return "REQ";
104     default:
105         return NULL;
106     }
107 }
108 
109 int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
110                            int action, int fd, Error **errp)
111 {
112     struct vfio_irq_set *irq_set;
113     int argsz, ret = 0;
114     const char *name;
115     int32_t *pfd;
116 
117     argsz = sizeof(*irq_set) + sizeof(*pfd);
118 
119     irq_set = g_malloc0(argsz);
120     irq_set->argsz = argsz;
121     irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action;
122     irq_set->index = index;
123     irq_set->start = subindex;
124     irq_set->count = 1;
125     pfd = (int32_t *)&irq_set->data;
126     *pfd = fd;
127 
128     if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
129         ret = -errno;
130     }
131     g_free(irq_set);
132 
133     if (!ret) {
134         return 0;
135     }
136 
137     error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure");
138 
139     name = index_to_str(vbasedev, index);
140     if (name) {
141         error_prepend(errp, "%s-%d: ", name, subindex);
142     } else {
143         error_prepend(errp, "index %d-%d: ", index, subindex);
144     }
145     error_prepend(errp,
146                   "Failed to %s %s eventfd signaling for interrupt ",
147                   fd < 0 ? "tear down" : "set up", action_to_str(action));
148     return ret;
149 }
150 
151 /*
152  * IO Port/MMIO - Beware of the endians, VFIO is always little endian
153  */
154 void vfio_region_write(void *opaque, hwaddr addr,
155                        uint64_t data, unsigned size)
156 {
157     VFIORegion *region = opaque;
158     VFIODevice *vbasedev = region->vbasedev;
159     union {
160         uint8_t byte;
161         uint16_t word;
162         uint32_t dword;
163         uint64_t qword;
164     } buf;
165 
166     switch (size) {
167     case 1:
168         buf.byte = data;
169         break;
170     case 2:
171         buf.word = cpu_to_le16(data);
172         break;
173     case 4:
174         buf.dword = cpu_to_le32(data);
175         break;
176     case 8:
177         buf.qword = cpu_to_le64(data);
178         break;
179     default:
180         hw_error("vfio: unsupported write size, %u bytes", size);
181         break;
182     }
183 
184     if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
185         error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
186                      ",%d) failed: %m",
187                      __func__, vbasedev->name, region->nr,
188                      addr, data, size);
189     }
190 
191     trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
192 
193     /*
194      * A read or write to a BAR always signals an INTx EOI.  This will
195      * do nothing if not pending (including not in INTx mode).  We assume
196      * that a BAR access is in response to an interrupt and that BAR
197      * accesses will service the interrupt.  Unfortunately, we don't know
198      * which access will service the interrupt, so we're potentially
199      * getting quite a few host interrupts per guest interrupt.
200      */
201     vbasedev->ops->vfio_eoi(vbasedev);
202 }
203 
204 uint64_t vfio_region_read(void *opaque,
205                           hwaddr addr, unsigned size)
206 {
207     VFIORegion *region = opaque;
208     VFIODevice *vbasedev = region->vbasedev;
209     union {
210         uint8_t byte;
211         uint16_t word;
212         uint32_t dword;
213         uint64_t qword;
214     } buf;
215     uint64_t data = 0;
216 
217     if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
218         error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
219                      __func__, vbasedev->name, region->nr,
220                      addr, size);
221         return (uint64_t)-1;
222     }
223     switch (size) {
224     case 1:
225         data = buf.byte;
226         break;
227     case 2:
228         data = le16_to_cpu(buf.word);
229         break;
230     case 4:
231         data = le32_to_cpu(buf.dword);
232         break;
233     case 8:
234         data = le64_to_cpu(buf.qword);
235         break;
236     default:
237         hw_error("vfio: unsupported read size, %u bytes", size);
238         break;
239     }
240 
241     trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data);
242 
243     /* Same as write above */
244     vbasedev->ops->vfio_eoi(vbasedev);
245 
246     return data;
247 }
248 
249 const MemoryRegionOps vfio_region_ops = {
250     .read = vfio_region_read,
251     .write = vfio_region_write,
252     .endianness = DEVICE_LITTLE_ENDIAN,
253     .valid = {
254         .min_access_size = 1,
255         .max_access_size = 8,
256     },
257     .impl = {
258         .min_access_size = 1,
259         .max_access_size = 8,
260     },
261 };
262 
263 int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
264 {
265     vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
266     vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) /
267                                          BITS_PER_BYTE;
268     vbmap->bitmap = g_try_malloc0(vbmap->size);
269     if (!vbmap->bitmap) {
270         return -ENOMEM;
271     }
272 
273     return 0;
274 }
275 
276 struct vfio_info_cap_header *
277 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id)
278 {
279     struct vfio_info_cap_header *hdr;
280 
281     for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
282         if (hdr->id == id) {
283             return hdr;
284         }
285     }
286 
287     return NULL;
288 }
289 
290 struct vfio_info_cap_header *
291 vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
292 {
293     if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
294         return NULL;
295     }
296 
297     return vfio_get_cap((void *)info, info->cap_offset, id);
298 }
299 
300 struct vfio_info_cap_header *
301 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id)
302 {
303     if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) {
304         return NULL;
305     }
306 
307     return vfio_get_cap((void *)info, info->cap_offset, id);
308 }
309 
310 static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
311                                           struct vfio_region_info *info)
312 {
313     struct vfio_info_cap_header *hdr;
314     struct vfio_region_info_cap_sparse_mmap *sparse;
315     int i, j;
316 
317     hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
318     if (!hdr) {
319         return -ENODEV;
320     }
321 
322     sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
323 
324     trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
325                                          region->nr, sparse->nr_areas);
326 
327     region->mmaps = g_new0(VFIOMmap, sparse->nr_areas);
328 
329     for (i = 0, j = 0; i < sparse->nr_areas; i++) {
330         if (sparse->areas[i].size) {
331             trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
332                                             sparse->areas[i].offset +
333                                             sparse->areas[i].size - 1);
334             region->mmaps[j].offset = sparse->areas[i].offset;
335             region->mmaps[j].size = sparse->areas[i].size;
336             j++;
337         }
338     }
339 
340     region->nr_mmaps = j;
341     region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap));
342 
343     return 0;
344 }
345 
346 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
347                       int index, const char *name)
348 {
349     struct vfio_region_info *info;
350     int ret;
351 
352     ret = vfio_get_region_info(vbasedev, index, &info);
353     if (ret) {
354         return ret;
355     }
356 
357     region->vbasedev = vbasedev;
358     region->flags = info->flags;
359     region->size = info->size;
360     region->fd_offset = info->offset;
361     region->nr = index;
362 
363     if (region->size) {
364         region->mem = g_new0(MemoryRegion, 1);
365         memory_region_init_io(region->mem, obj, &vfio_region_ops,
366                               region, name, region->size);
367 
368         if (!vbasedev->no_mmap &&
369             region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
370 
371             ret = vfio_setup_region_sparse_mmaps(region, info);
372 
373             if (ret) {
374                 region->nr_mmaps = 1;
375                 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
376                 region->mmaps[0].offset = 0;
377                 region->mmaps[0].size = region->size;
378             }
379         }
380     }
381 
382     g_free(info);
383 
384     trace_vfio_region_setup(vbasedev->name, index, name,
385                             region->flags, region->fd_offset, region->size);
386     return 0;
387 }
388 
389 static void vfio_subregion_unmap(VFIORegion *region, int index)
390 {
391     trace_vfio_region_unmap(memory_region_name(&region->mmaps[index].mem),
392                             region->mmaps[index].offset,
393                             region->mmaps[index].offset +
394                             region->mmaps[index].size - 1);
395     memory_region_del_subregion(region->mem, &region->mmaps[index].mem);
396     munmap(region->mmaps[index].mmap, region->mmaps[index].size);
397     object_unparent(OBJECT(&region->mmaps[index].mem));
398     region->mmaps[index].mmap = NULL;
399 }
400 
401 int vfio_region_mmap(VFIORegion *region)
402 {
403     int i, prot = 0;
404     char *name;
405 
406     if (!region->mem) {
407         return 0;
408     }
409 
410     prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
411     prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
412 
413     for (i = 0; i < region->nr_mmaps; i++) {
414         region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
415                                      MAP_SHARED, region->vbasedev->fd,
416                                      region->fd_offset +
417                                      region->mmaps[i].offset);
418         if (region->mmaps[i].mmap == MAP_FAILED) {
419             int ret = -errno;
420 
421             trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
422                                          region->fd_offset +
423                                          region->mmaps[i].offset,
424                                          region->fd_offset +
425                                          region->mmaps[i].offset +
426                                          region->mmaps[i].size - 1, ret);
427 
428             region->mmaps[i].mmap = NULL;
429 
430             for (i--; i >= 0; i--) {
431                 vfio_subregion_unmap(region, i);
432             }
433 
434             return ret;
435         }
436 
437         name = g_strdup_printf("%s mmaps[%d]",
438                                memory_region_name(region->mem), i);
439         memory_region_init_ram_device_ptr(&region->mmaps[i].mem,
440                                           memory_region_owner(region->mem),
441                                           name, region->mmaps[i].size,
442                                           region->mmaps[i].mmap);
443         g_free(name);
444         memory_region_add_subregion(region->mem, region->mmaps[i].offset,
445                                     &region->mmaps[i].mem);
446 
447         trace_vfio_region_mmap(memory_region_name(&region->mmaps[i].mem),
448                                region->mmaps[i].offset,
449                                region->mmaps[i].offset +
450                                region->mmaps[i].size - 1);
451     }
452 
453     return 0;
454 }
455 
456 void vfio_region_unmap(VFIORegion *region)
457 {
458     int i;
459 
460     if (!region->mem) {
461         return;
462     }
463 
464     for (i = 0; i < region->nr_mmaps; i++) {
465         if (region->mmaps[i].mmap) {
466             vfio_subregion_unmap(region, i);
467         }
468     }
469 }
470 
471 void vfio_region_exit(VFIORegion *region)
472 {
473     int i;
474 
475     if (!region->mem) {
476         return;
477     }
478 
479     for (i = 0; i < region->nr_mmaps; i++) {
480         if (region->mmaps[i].mmap) {
481             memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
482         }
483     }
484 
485     trace_vfio_region_exit(region->vbasedev->name, region->nr);
486 }
487 
488 void vfio_region_finalize(VFIORegion *region)
489 {
490     int i;
491 
492     if (!region->mem) {
493         return;
494     }
495 
496     for (i = 0; i < region->nr_mmaps; i++) {
497         if (region->mmaps[i].mmap) {
498             munmap(region->mmaps[i].mmap, region->mmaps[i].size);
499             object_unparent(OBJECT(&region->mmaps[i].mem));
500         }
501     }
502 
503     object_unparent(OBJECT(region->mem));
504 
505     g_free(region->mem);
506     g_free(region->mmaps);
507 
508     trace_vfio_region_finalize(region->vbasedev->name, region->nr);
509 
510     region->mem = NULL;
511     region->mmaps = NULL;
512     region->nr_mmaps = 0;
513     region->size = 0;
514     region->flags = 0;
515     region->nr = 0;
516 }
517 
518 void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
519 {
520     int i;
521 
522     if (!region->mem) {
523         return;
524     }
525 
526     for (i = 0; i < region->nr_mmaps; i++) {
527         if (region->mmaps[i].mmap) {
528             memory_region_set_enabled(&region->mmaps[i].mem, enabled);
529         }
530     }
531 
532     trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
533                                         enabled);
534 }
535 
536 int vfio_get_region_info(VFIODevice *vbasedev, int index,
537                          struct vfio_region_info **info)
538 {
539     size_t argsz = sizeof(struct vfio_region_info);
540 
541     *info = g_malloc0(argsz);
542 
543     (*info)->index = index;
544 retry:
545     (*info)->argsz = argsz;
546 
547     if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
548         g_free(*info);
549         *info = NULL;
550         return -errno;
551     }
552 
553     if ((*info)->argsz > argsz) {
554         argsz = (*info)->argsz;
555         *info = g_realloc(*info, argsz);
556 
557         goto retry;
558     }
559 
560     return 0;
561 }
562 
563 int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
564                              uint32_t subtype, struct vfio_region_info **info)
565 {
566     int i;
567 
568     for (i = 0; i < vbasedev->num_regions; i++) {
569         struct vfio_info_cap_header *hdr;
570         struct vfio_region_info_cap_type *cap_type;
571 
572         if (vfio_get_region_info(vbasedev, i, info)) {
573             continue;
574         }
575 
576         hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
577         if (!hdr) {
578             g_free(*info);
579             continue;
580         }
581 
582         cap_type = container_of(hdr, struct vfio_region_info_cap_type, header);
583 
584         trace_vfio_get_dev_region(vbasedev->name, i,
585                                   cap_type->type, cap_type->subtype);
586 
587         if (cap_type->type == type && cap_type->subtype == subtype) {
588             return 0;
589         }
590 
591         g_free(*info);
592     }
593 
594     *info = NULL;
595     return -ENODEV;
596 }
597 
598 bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
599 {
600     struct vfio_region_info *info = NULL;
601     bool ret = false;
602 
603     if (!vfio_get_region_info(vbasedev, region, &info)) {
604         if (vfio_get_region_info_cap(info, cap_type)) {
605             ret = true;
606         }
607         g_free(info);
608     }
609 
610     return ret;
611 }
612