xref: /openbmc/qemu/hw/virtio/vhost-vdpa.c (revision 63e79833c46fa6d21efb1c4d966b0a204c4b7b0f)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 
24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
25 {
26     return (!memory_region_is_ram(section->mr) &&
27             !memory_region_is_iommu(section->mr)) ||
28            /*
29             * Sizing an enabled 64-bit BAR can cause spurious mappings to
30             * addresses in the upper part of the 64-bit address space.  These
31             * are never accessed by the CPU and beyond the address width of
32             * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
33             */
34            section->offset_within_address_space & (1ULL << 63);
35 }
36 
37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
38                               void *vaddr, bool readonly)
39 {
40     struct vhost_msg_v2 msg = {};
41     int fd = v->device_fd;
42     int ret = 0;
43 
44     msg.type = v->msg_type;
45     msg.iotlb.iova = iova;
46     msg.iotlb.size = size;
47     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
48     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
49     msg.iotlb.type = VHOST_IOTLB_UPDATE;
50 
51     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
52         error_report("failed to write, fd=%d, errno=%d (%s)",
53             fd, errno, strerror(errno));
54         return -EIO ;
55     }
56 
57     return ret;
58 }
59 
60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
61                                 hwaddr size)
62 {
63     struct vhost_msg_v2 msg = {};
64     int fd = v->device_fd;
65     int ret = 0;
66 
67     msg.type = v->msg_type;
68     msg.iotlb.iova = iova;
69     msg.iotlb.size = size;
70     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
71 
72     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
73         error_report("failed to write, fd=%d, errno=%d (%s)",
74             fd, errno, strerror(errno));
75         return -EIO ;
76     }
77 
78     return ret;
79 }
80 
81 static void vhost_vdpa_listener_begin(MemoryListener *listener)
82 {
83     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
84     struct vhost_dev *dev = v->dev;
85     struct vhost_msg_v2 msg;
86     int fd = v->device_fd;
87 
88     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
89         return;
90     }
91 
92     msg.type = v->msg_type;
93     msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
94 
95     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
96         error_report("failed to write, fd=%d, errno=%d (%s)",
97                      fd, errno, strerror(errno));
98     }
99 }
100 
101 static void vhost_vdpa_listener_commit(MemoryListener *listener)
102 {
103     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
104     struct vhost_dev *dev = v->dev;
105     struct vhost_msg_v2 msg;
106     int fd = v->device_fd;
107 
108     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
109         return;
110     }
111 
112     msg.type = v->msg_type;
113     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
114 
115     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
116         error_report("failed to write, fd=%d, errno=%d (%s)",
117                      fd, errno, strerror(errno));
118     }
119 }
120 
121 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
122                                            MemoryRegionSection *section)
123 {
124     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
125     hwaddr iova;
126     Int128 llend, llsize;
127     void *vaddr;
128     int ret;
129 
130     if (vhost_vdpa_listener_skipped_section(section)) {
131         return;
132     }
133 
134     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
135                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
136         error_report("%s received unaligned region", __func__);
137         return;
138     }
139 
140     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
141     llend = int128_make64(section->offset_within_address_space);
142     llend = int128_add(llend, section->size);
143     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
144 
145     if (int128_ge(int128_make64(iova), llend)) {
146         return;
147     }
148 
149     memory_region_ref(section->mr);
150 
151     /* Here we assume that memory_region_is_ram(section->mr)==true */
152 
153     vaddr = memory_region_get_ram_ptr(section->mr) +
154             section->offset_within_region +
155             (iova - section->offset_within_address_space);
156 
157     llsize = int128_sub(llend, int128_make64(iova));
158 
159     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
160                              vaddr, section->readonly);
161     if (ret) {
162         error_report("vhost vdpa map fail!");
163         if (memory_region_is_ram_device(section->mr)) {
164             /* Allow unexpected mappings not to be fatal for RAM devices */
165             error_report("map ram fail!");
166           return ;
167         }
168         goto fail;
169     }
170 
171     return;
172 
173 fail:
174     if (memory_region_is_ram_device(section->mr)) {
175         error_report("failed to vdpa_dma_map. pci p2p may not work");
176         return;
177 
178     }
179     /*
180      * On the initfn path, store the first error in the container so we
181      * can gracefully fail.  Runtime, there's not much we can do other
182      * than throw a hardware error.
183      */
184     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
185     return;
186 
187 }
188 
189 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
190                                            MemoryRegionSection *section)
191 {
192     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
193     hwaddr iova;
194     Int128 llend, llsize;
195     int ret;
196 
197     if (vhost_vdpa_listener_skipped_section(section)) {
198         return;
199     }
200 
201     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
202                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
203         error_report("%s received unaligned region", __func__);
204         return;
205     }
206 
207     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
208     llend = int128_make64(section->offset_within_address_space);
209     llend = int128_add(llend, section->size);
210     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
211 
212     if (int128_ge(int128_make64(iova), llend)) {
213         return;
214     }
215 
216     llsize = int128_sub(llend, int128_make64(iova));
217 
218     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
219     if (ret) {
220         error_report("vhost_vdpa dma unmap error!");
221     }
222 
223     memory_region_unref(section->mr);
224 }
225 /*
226  * IOTLB API is used by vhost-vpda which requires incremental updating
227  * of the mapping. So we can not use generic vhost memory listener which
228  * depends on the addnop().
229  */
230 static const MemoryListener vhost_vdpa_memory_listener = {
231     .begin = vhost_vdpa_listener_begin,
232     .commit = vhost_vdpa_listener_commit,
233     .region_add = vhost_vdpa_listener_region_add,
234     .region_del = vhost_vdpa_listener_region_del,
235 };
236 
237 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
238                              void *arg)
239 {
240     struct vhost_vdpa *v = dev->opaque;
241     int fd = v->device_fd;
242 
243     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
244 
245     return ioctl(fd, request, arg);
246 }
247 
248 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
249 {
250     uint8_t s;
251 
252     if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
253         return;
254     }
255 
256     s |= status;
257 
258     vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
259 }
260 
261 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
262 {
263     struct vhost_vdpa *v;
264     uint64_t features;
265     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
266 
267     v = opaque;
268     v->dev = dev;
269     dev->opaque =  opaque ;
270     vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
271     dev->backend_features = features;
272     v->listener = vhost_vdpa_memory_listener;
273     v->msg_type = VHOST_IOTLB_MSG_V2;
274 
275     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
276                                VIRTIO_CONFIG_S_DRIVER);
277 
278     return 0;
279 }
280 
281 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
282 {
283     struct vhost_vdpa *v;
284     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
285     v = dev->opaque;
286     memory_listener_unregister(&v->listener);
287 
288     dev->opaque = NULL;
289     return 0;
290 }
291 
292 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
293 {
294     return INT_MAX;
295 }
296 
297 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
298                                     struct vhost_memory *mem)
299 {
300 
301     if (mem->padding) {
302         return -1;
303     }
304 
305     return 0;
306 }
307 
308 static int vhost_vdpa_set_features(struct vhost_dev *dev,
309                                    uint64_t features)
310 {
311     int ret;
312     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
313     uint8_t status = 0;
314     if (ret) {
315         return ret;
316     }
317     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
318     vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
319 
320     return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
321 }
322 
323 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
324 {
325     uint64_t features;
326     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
327         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
328     int r;
329 
330     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
331         return 0;
332     }
333 
334     features &= f;
335     r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
336     if (r) {
337         return 0;
338     }
339 
340     dev->backend_cap = features;
341 
342     return 0;
343 }
344 
345 int vhost_vdpa_get_device_id(struct vhost_dev *dev,
346                                    uint32_t *device_id)
347 {
348     return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
349 }
350 
351 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
352 {
353     uint8_t status = 0;
354 
355     return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
356 }
357 
358 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
359 {
360     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
361 
362     return idx - dev->vq_index;
363 }
364 
365 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
366 {
367     int i;
368     for (i = 0; i < dev->nvqs; ++i) {
369         struct vhost_vring_state state = {
370             .index = dev->vq_index + i,
371             .num = 1,
372         };
373         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
374     }
375     return 0;
376 }
377 
378 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
379                                    uint32_t offset, uint32_t size,
380                                    uint32_t flags)
381 {
382     struct vhost_vdpa_config *config;
383     int ret;
384     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
385 
386     config = g_malloc(size + config_size);
387     config->off = offset;
388     config->len = size;
389     memcpy(config->buf, data, size);
390     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
391     g_free(config);
392     return ret;
393 }
394 
395 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
396                                    uint32_t config_len)
397 {
398     struct vhost_vdpa_config *v_config;
399     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
400     int ret;
401 
402     v_config = g_malloc(config_len + config_size);
403     v_config->len = config_len;
404     v_config->off = 0;
405     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
406     memcpy(config, v_config->buf, config_len);
407     g_free(v_config);
408     return ret;
409  }
410 
411 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
412 {
413     struct vhost_vdpa *v = dev->opaque;
414     if (started) {
415         uint8_t status = 0;
416         memory_listener_register(&v->listener, &address_space_memory);
417         vhost_vdpa_set_vring_ready(dev);
418         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
419         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
420 
421         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
422     } else {
423         vhost_vdpa_reset_device(dev);
424         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
425                                    VIRTIO_CONFIG_S_DRIVER);
426         memory_listener_unregister(&v->listener);
427 
428         return 0;
429     }
430 }
431 
432 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
433                                      struct vhost_log *log)
434 {
435     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
436 }
437 
438 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
439                                        struct vhost_vring_addr *addr)
440 {
441     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
442 }
443 
444 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
445                                       struct vhost_vring_state *ring)
446 {
447     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
448 }
449 
450 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
451                                        struct vhost_vring_state *ring)
452 {
453     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
454 }
455 
456 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
457                                        struct vhost_vring_state *ring)
458 {
459     return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
460 }
461 
462 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
463                                        struct vhost_vring_file *file)
464 {
465     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
466 }
467 
468 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
469                                        struct vhost_vring_file *file)
470 {
471     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
472 }
473 
474 static int vhost_vdpa_get_features(struct vhost_dev *dev,
475                                      uint64_t *features)
476 {
477     return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
478 }
479 
480 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
481 {
482     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
483 }
484 
485 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
486                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
487 {
488     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
489     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
490     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
491     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
492     return 0;
493 }
494 
495 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
496 {
497     return true;
498 }
499 
500 const VhostOps vdpa_ops = {
501         .backend_type = VHOST_BACKEND_TYPE_VDPA,
502         .vhost_backend_init = vhost_vdpa_init,
503         .vhost_backend_cleanup = vhost_vdpa_cleanup,
504         .vhost_set_log_base = vhost_vdpa_set_log_base,
505         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
506         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
507         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
508         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
509         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
510         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
511         .vhost_get_features = vhost_vdpa_get_features,
512         .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
513         .vhost_set_owner = vhost_vdpa_set_owner,
514         .vhost_set_vring_endian = NULL,
515         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
516         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
517         .vhost_set_features = vhost_vdpa_set_features,
518         .vhost_reset_device = vhost_vdpa_reset_device,
519         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
520         .vhost_get_config  = vhost_vdpa_get_config,
521         .vhost_set_config = vhost_vdpa_set_config,
522         .vhost_requires_shm_log = NULL,
523         .vhost_migration_done = NULL,
524         .vhost_backend_can_merge = NULL,
525         .vhost_net_set_mtu = NULL,
526         .vhost_set_iotlb_callback = NULL,
527         .vhost_send_device_iotlb_msg = NULL,
528         .vhost_dev_start = vhost_vdpa_dev_start,
529         .vhost_get_device_id = vhost_vdpa_get_device_id,
530         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
531         .vhost_force_iommu = vhost_vdpa_force_iommu,
532 };
533