xref: /openbmc/qemu/hw/virtio/vhost-vdpa.c (revision 3c4b89c3)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
22 #include <linux/kvm.h>
23 #include "sysemu/kvm.h"
24 
25 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
26 {
27     return (!memory_region_is_ram(section->mr) &&
28             !memory_region_is_iommu(section->mr)) ||
29            /*
30             * Sizing an enabled 64-bit BAR can cause spurious mappings to
31             * addresses in the upper part of the 64-bit address space.  These
32             * are never accessed by the CPU and beyond the address width of
33             * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
34             */
35            section->offset_within_address_space & (1ULL << 63);
36 }
37 
38 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
39                               void *vaddr, bool readonly)
40 {
41     struct vhost_msg_v2 msg;
42     int fd = v->device_fd;
43     int ret = 0;
44 
45     msg.type = v->msg_type;
46     msg.iotlb.iova = iova;
47     msg.iotlb.size = size;
48     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
49     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
50     msg.iotlb.type = VHOST_IOTLB_UPDATE;
51 
52     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
53         error_report("failed to write, fd=%d, errno=%d (%s)",
54             fd, errno, strerror(errno));
55         return -EIO ;
56     }
57 
58     return ret;
59 }
60 
61 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
62                                 hwaddr size)
63 {
64     struct vhost_msg_v2 msg;
65     int fd = v->device_fd;
66     int ret = 0;
67 
68     msg.type = v->msg_type;
69     msg.iotlb.iova = iova;
70     msg.iotlb.size = size;
71     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
72 
73     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
74         error_report("failed to write, fd=%d, errno=%d (%s)",
75             fd, errno, strerror(errno));
76         return -EIO ;
77     }
78 
79     return ret;
80 }
81 
82 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
83                                            MemoryRegionSection *section)
84 {
85     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
86     hwaddr iova;
87     Int128 llend, llsize;
88     void *vaddr;
89     int ret;
90 
91     if (vhost_vdpa_listener_skipped_section(section)) {
92         return;
93     }
94 
95     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
96                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
97         error_report("%s received unaligned region", __func__);
98         return;
99     }
100 
101     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
102     llend = int128_make64(section->offset_within_address_space);
103     llend = int128_add(llend, section->size);
104     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
105 
106     if (int128_ge(int128_make64(iova), llend)) {
107         return;
108     }
109 
110     memory_region_ref(section->mr);
111 
112     /* Here we assume that memory_region_is_ram(section->mr)==true */
113 
114     vaddr = memory_region_get_ram_ptr(section->mr) +
115             section->offset_within_region +
116             (iova - section->offset_within_address_space);
117 
118     llsize = int128_sub(llend, int128_make64(iova));
119 
120     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
121                              vaddr, section->readonly);
122     if (ret) {
123         error_report("vhost vdpa map fail!");
124         if (memory_region_is_ram_device(section->mr)) {
125             /* Allow unexpected mappings not to be fatal for RAM devices */
126             error_report("map ram fail!");
127           return ;
128         }
129         goto fail;
130     }
131 
132     return;
133 
134 fail:
135     if (memory_region_is_ram_device(section->mr)) {
136         error_report("failed to vdpa_dma_map. pci p2p may not work");
137         return;
138 
139     }
140     /*
141      * On the initfn path, store the first error in the container so we
142      * can gracefully fail.  Runtime, there's not much we can do other
143      * than throw a hardware error.
144      */
145     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
146     return;
147 
148 }
149 
150 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
151                                            MemoryRegionSection *section)
152 {
153     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
154     hwaddr iova;
155     Int128 llend, llsize;
156     int ret;
157     bool try_unmap = true;
158 
159     if (vhost_vdpa_listener_skipped_section(section)) {
160         return;
161     }
162 
163     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
164                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
165         error_report("%s received unaligned region", __func__);
166         return;
167     }
168 
169     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
170     llend = int128_make64(section->offset_within_address_space);
171     llend = int128_add(llend, section->size);
172     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
173 
174     if (int128_ge(int128_make64(iova), llend)) {
175         return;
176     }
177 
178     llsize = int128_sub(llend, int128_make64(iova));
179 
180     if (try_unmap) {
181         ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
182         if (ret) {
183             error_report("vhost_vdpa dma unmap error!");
184         }
185     }
186 
187     memory_region_unref(section->mr);
188 }
189 /*
190  * IOTLB API is used by vhost-vpda which requires incremental updating
191  * of the mapping. So we can not use generic vhost memory listener which
192  * depends on the addnop().
193  */
194 static const MemoryListener vhost_vdpa_memory_listener = {
195     .region_add = vhost_vdpa_listener_region_add,
196     .region_del = vhost_vdpa_listener_region_del,
197 };
198 
199 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
200                              void *arg)
201 {
202     struct vhost_vdpa *v = dev->opaque;
203     int fd = v->device_fd;
204 
205     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
206 
207     return ioctl(fd, request, arg);
208 }
209 
210 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
211 {
212     uint8_t s;
213 
214     if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
215         return;
216     }
217 
218     s |= status;
219 
220     vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
221 }
222 
223 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
224 {
225     struct vhost_vdpa *v;
226     uint64_t features;
227     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
228 
229     v = opaque;
230     dev->opaque =  opaque ;
231     vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
232     dev->backend_features = features;
233     v->listener = vhost_vdpa_memory_listener;
234     v->msg_type = VHOST_IOTLB_MSG_V2;
235 
236     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
237                                VIRTIO_CONFIG_S_DRIVER);
238 
239     return 0;
240 }
241 
242 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
243 {
244     struct vhost_vdpa *v;
245     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
246     v = dev->opaque;
247     memory_listener_unregister(&v->listener);
248 
249     dev->opaque = NULL;
250     return 0;
251 }
252 
253 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
254 {
255     return INT_MAX;
256 }
257 
258 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
259                                     struct vhost_memory *mem)
260 {
261 
262     if (mem->padding) {
263         return -1;
264     }
265 
266     return 0;
267 }
268 
269 static int vhost_vdpa_set_features(struct vhost_dev *dev,
270                                    uint64_t features)
271 {
272     int ret;
273     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
274     uint8_t status = 0;
275     if (ret) {
276         return ret;
277     }
278     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
279     vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
280 
281     return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
282 }
283 
284 int vhost_vdpa_get_device_id(struct vhost_dev *dev,
285                                    uint32_t *device_id)
286 {
287     return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
288 }
289 
290 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
291 {
292     uint8_t status = 0;
293 
294     return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
295 }
296 
297 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
298 {
299     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
300 
301     return idx - dev->vq_index;
302 }
303 
304 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
305 {
306     int i;
307     for (i = 0; i < dev->nvqs; ++i) {
308         struct vhost_vring_state state = {
309             .index = dev->vq_index + i,
310             .num = 1,
311         };
312         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
313     }
314     return 0;
315 }
316 
317 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
318                                    uint32_t offset, uint32_t size,
319                                    uint32_t flags)
320 {
321     struct vhost_vdpa_config *config;
322     int ret;
323     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
324     config = g_malloc(size + config_size);
325     if (config == NULL) {
326         return -1;
327     }
328     config->off = offset;
329     config->len = size;
330     memcpy(config->buf, data, size);
331     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
332     g_free(config);
333     return ret;
334 }
335 
336 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
337                                    uint32_t config_len)
338 {
339     struct vhost_vdpa_config *v_config;
340     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
341     int ret;
342 
343     v_config = g_malloc(config_len + config_size);
344     if (v_config == NULL) {
345         return -1;
346     }
347     v_config->len = config_len;
348     v_config->off = 0;
349     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
350     memcpy(config, v_config->buf, config_len);
351     g_free(v_config);
352     return ret;
353  }
354 
355 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
356 {
357     struct vhost_vdpa *v = dev->opaque;
358     if (started) {
359         uint8_t status = 0;
360         memory_listener_register(&v->listener, &address_space_memory);
361         vhost_vdpa_set_vring_ready(dev);
362         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
363         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
364 
365         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
366     } else {
367         vhost_vdpa_reset_device(dev);
368         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
369                                    VIRTIO_CONFIG_S_DRIVER);
370         memory_listener_unregister(&v->listener);
371 
372         return 0;
373     }
374 }
375 
376 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
377                                      struct vhost_log *log)
378 {
379     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
380 }
381 
382 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
383                                        struct vhost_vring_addr *addr)
384 {
385     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
386 }
387 
388 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
389                                       struct vhost_vring_state *ring)
390 {
391     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
392 }
393 
394 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
395                                        struct vhost_vring_state *ring)
396 {
397     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
398 }
399 
400 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
401                                        struct vhost_vring_state *ring)
402 {
403     return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
404 }
405 
406 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
407                                        struct vhost_vring_file *file)
408 {
409     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
410 }
411 
412 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
413                                        struct vhost_vring_file *file)
414 {
415     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
416 }
417 
418 static int vhost_vdpa_get_features(struct vhost_dev *dev,
419                                      uint64_t *features)
420 {
421     return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
422 }
423 
424 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
425 {
426     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
427 }
428 
429 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
430                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
431 {
432     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
433     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
434     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
435     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
436     return 0;
437 }
438 
439 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
440 {
441     return true;
442 }
443 
444 const VhostOps vdpa_ops = {
445         .backend_type = VHOST_BACKEND_TYPE_VDPA,
446         .vhost_backend_init = vhost_vdpa_init,
447         .vhost_backend_cleanup = vhost_vdpa_cleanup,
448         .vhost_set_log_base = vhost_vdpa_set_log_base,
449         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
450         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
451         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
452         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
453         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
454         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
455         .vhost_get_features = vhost_vdpa_get_features,
456         .vhost_set_owner = vhost_vdpa_set_owner,
457         .vhost_set_vring_endian = NULL,
458         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
459         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
460         .vhost_set_features = vhost_vdpa_set_features,
461         .vhost_reset_device = vhost_vdpa_reset_device,
462         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
463         .vhost_get_config  = vhost_vdpa_get_config,
464         .vhost_set_config = vhost_vdpa_set_config,
465         .vhost_requires_shm_log = NULL,
466         .vhost_migration_done = NULL,
467         .vhost_backend_can_merge = NULL,
468         .vhost_net_set_mtu = NULL,
469         .vhost_set_iotlb_callback = NULL,
470         .vhost_send_device_iotlb_msg = NULL,
471         .vhost_dev_start = vhost_vdpa_dev_start,
472         .vhost_get_device_id = vhost_vdpa_get_device_id,
473          .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
474          .vhost_force_iommu = vhost_vdpa_force_iommu,
475 };
476