xref: /openbmc/qemu/hw/virtio/vhost-vdpa.c (revision 7acafcfa)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 
24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
25 {
26     return (!memory_region_is_ram(section->mr) &&
27             !memory_region_is_iommu(section->mr)) ||
28            /*
29             * Sizing an enabled 64-bit BAR can cause spurious mappings to
30             * addresses in the upper part of the 64-bit address space.  These
31             * are never accessed by the CPU and beyond the address width of
32             * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
33             */
34            section->offset_within_address_space & (1ULL << 63);
35 }
36 
37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
38                               void *vaddr, bool readonly)
39 {
40     struct vhost_msg_v2 msg;
41     int fd = v->device_fd;
42     int ret = 0;
43 
44     msg.type = v->msg_type;
45     msg.iotlb.iova = iova;
46     msg.iotlb.size = size;
47     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
48     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
49     msg.iotlb.type = VHOST_IOTLB_UPDATE;
50 
51     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
52         error_report("failed to write, fd=%d, errno=%d (%s)",
53             fd, errno, strerror(errno));
54         return -EIO ;
55     }
56 
57     return ret;
58 }
59 
60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
61                                 hwaddr size)
62 {
63     struct vhost_msg_v2 msg;
64     int fd = v->device_fd;
65     int ret = 0;
66 
67     msg.type = v->msg_type;
68     msg.iotlb.iova = iova;
69     msg.iotlb.size = size;
70     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
71 
72     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
73         error_report("failed to write, fd=%d, errno=%d (%s)",
74             fd, errno, strerror(errno));
75         return -EIO ;
76     }
77 
78     return ret;
79 }
80 
81 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
82                                            MemoryRegionSection *section)
83 {
84     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
85     hwaddr iova;
86     Int128 llend, llsize;
87     void *vaddr;
88     int ret;
89 
90     if (vhost_vdpa_listener_skipped_section(section)) {
91         return;
92     }
93 
94     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
95                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
96         error_report("%s received unaligned region", __func__);
97         return;
98     }
99 
100     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
101     llend = int128_make64(section->offset_within_address_space);
102     llend = int128_add(llend, section->size);
103     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
104 
105     if (int128_ge(int128_make64(iova), llend)) {
106         return;
107     }
108 
109     memory_region_ref(section->mr);
110 
111     /* Here we assume that memory_region_is_ram(section->mr)==true */
112 
113     vaddr = memory_region_get_ram_ptr(section->mr) +
114             section->offset_within_region +
115             (iova - section->offset_within_address_space);
116 
117     llsize = int128_sub(llend, int128_make64(iova));
118 
119     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
120                              vaddr, section->readonly);
121     if (ret) {
122         error_report("vhost vdpa map fail!");
123         if (memory_region_is_ram_device(section->mr)) {
124             /* Allow unexpected mappings not to be fatal for RAM devices */
125             error_report("map ram fail!");
126           return ;
127         }
128         goto fail;
129     }
130 
131     return;
132 
133 fail:
134     if (memory_region_is_ram_device(section->mr)) {
135         error_report("failed to vdpa_dma_map. pci p2p may not work");
136         return;
137 
138     }
139     /*
140      * On the initfn path, store the first error in the container so we
141      * can gracefully fail.  Runtime, there's not much we can do other
142      * than throw a hardware error.
143      */
144     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
145     return;
146 
147 }
148 
149 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
150                                            MemoryRegionSection *section)
151 {
152     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
153     hwaddr iova;
154     Int128 llend, llsize;
155     int ret;
156     bool try_unmap = true;
157 
158     if (vhost_vdpa_listener_skipped_section(section)) {
159         return;
160     }
161 
162     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
163                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
164         error_report("%s received unaligned region", __func__);
165         return;
166     }
167 
168     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
169     llend = int128_make64(section->offset_within_address_space);
170     llend = int128_add(llend, section->size);
171     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
172 
173     if (int128_ge(int128_make64(iova), llend)) {
174         return;
175     }
176 
177     llsize = int128_sub(llend, int128_make64(iova));
178 
179     if (try_unmap) {
180         ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
181         if (ret) {
182             error_report("vhost_vdpa dma unmap error!");
183         }
184     }
185 
186     memory_region_unref(section->mr);
187 }
188 /*
189  * IOTLB API is used by vhost-vpda which requires incremental updating
190  * of the mapping. So we can not use generic vhost memory listener which
191  * depends on the addnop().
192  */
193 static const MemoryListener vhost_vdpa_memory_listener = {
194     .region_add = vhost_vdpa_listener_region_add,
195     .region_del = vhost_vdpa_listener_region_del,
196 };
197 
198 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
199                              void *arg)
200 {
201     struct vhost_vdpa *v = dev->opaque;
202     int fd = v->device_fd;
203 
204     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
205 
206     return ioctl(fd, request, arg);
207 }
208 
209 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
210 {
211     uint8_t s;
212 
213     if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
214         return;
215     }
216 
217     s |= status;
218 
219     vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
220 }
221 
222 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
223 {
224     struct vhost_vdpa *v;
225     uint64_t features;
226     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
227 
228     v = opaque;
229     dev->opaque =  opaque ;
230     vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
231     dev->backend_features = features;
232     v->listener = vhost_vdpa_memory_listener;
233     v->msg_type = VHOST_IOTLB_MSG_V2;
234 
235     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
236                                VIRTIO_CONFIG_S_DRIVER);
237 
238     return 0;
239 }
240 
241 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
242 {
243     struct vhost_vdpa *v;
244     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
245     v = dev->opaque;
246     memory_listener_unregister(&v->listener);
247 
248     dev->opaque = NULL;
249     return 0;
250 }
251 
252 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
253 {
254     return INT_MAX;
255 }
256 
257 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
258                                     struct vhost_memory *mem)
259 {
260 
261     if (mem->padding) {
262         return -1;
263     }
264 
265     return 0;
266 }
267 
268 static int vhost_vdpa_set_features(struct vhost_dev *dev,
269                                    uint64_t features)
270 {
271     int ret;
272     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
273     uint8_t status = 0;
274     if (ret) {
275         return ret;
276     }
277     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
278     vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
279 
280     return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
281 }
282 
283 int vhost_vdpa_get_device_id(struct vhost_dev *dev,
284                                    uint32_t *device_id)
285 {
286     return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
287 }
288 
289 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
290 {
291     uint8_t status = 0;
292 
293     return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
294 }
295 
296 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
297 {
298     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
299 
300     return idx - dev->vq_index;
301 }
302 
303 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
304 {
305     int i;
306     for (i = 0; i < dev->nvqs; ++i) {
307         struct vhost_vring_state state = {
308             .index = dev->vq_index + i,
309             .num = 1,
310         };
311         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
312     }
313     return 0;
314 }
315 
316 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
317                                    uint32_t offset, uint32_t size,
318                                    uint32_t flags)
319 {
320     struct vhost_vdpa_config *config;
321     int ret;
322     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
323     config = g_malloc(size + config_size);
324     if (config == NULL) {
325         return -1;
326     }
327     config->off = offset;
328     config->len = size;
329     memcpy(config->buf, data, size);
330     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
331     g_free(config);
332     return ret;
333 }
334 
335 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
336                                    uint32_t config_len)
337 {
338     struct vhost_vdpa_config *v_config;
339     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
340     int ret;
341 
342     v_config = g_malloc(config_len + config_size);
343     if (v_config == NULL) {
344         return -1;
345     }
346     v_config->len = config_len;
347     v_config->off = 0;
348     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
349     memcpy(config, v_config->buf, config_len);
350     g_free(v_config);
351     return ret;
352  }
353 
354 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
355 {
356     struct vhost_vdpa *v = dev->opaque;
357     if (started) {
358         uint8_t status = 0;
359         memory_listener_register(&v->listener, &address_space_memory);
360         vhost_vdpa_set_vring_ready(dev);
361         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
362         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
363 
364         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
365     } else {
366         vhost_vdpa_reset_device(dev);
367         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
368                                    VIRTIO_CONFIG_S_DRIVER);
369         memory_listener_unregister(&v->listener);
370 
371         return 0;
372     }
373 }
374 
375 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
376                                      struct vhost_log *log)
377 {
378     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
379 }
380 
381 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
382                                        struct vhost_vring_addr *addr)
383 {
384     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
385 }
386 
387 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
388                                       struct vhost_vring_state *ring)
389 {
390     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
391 }
392 
393 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
394                                        struct vhost_vring_state *ring)
395 {
396     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
397 }
398 
399 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
400                                        struct vhost_vring_state *ring)
401 {
402     return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
403 }
404 
405 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
406                                        struct vhost_vring_file *file)
407 {
408     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
409 }
410 
411 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
412                                        struct vhost_vring_file *file)
413 {
414     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
415 }
416 
417 static int vhost_vdpa_get_features(struct vhost_dev *dev,
418                                      uint64_t *features)
419 {
420     return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
421 }
422 
423 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
424 {
425     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
426 }
427 
428 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
429                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
430 {
431     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
432     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
433     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
434     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
435     return 0;
436 }
437 
438 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
439 {
440     return true;
441 }
442 
443 const VhostOps vdpa_ops = {
444         .backend_type = VHOST_BACKEND_TYPE_VDPA,
445         .vhost_backend_init = vhost_vdpa_init,
446         .vhost_backend_cleanup = vhost_vdpa_cleanup,
447         .vhost_set_log_base = vhost_vdpa_set_log_base,
448         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
449         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
450         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
451         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
452         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
453         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
454         .vhost_get_features = vhost_vdpa_get_features,
455         .vhost_set_owner = vhost_vdpa_set_owner,
456         .vhost_set_vring_endian = NULL,
457         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
458         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
459         .vhost_set_features = vhost_vdpa_set_features,
460         .vhost_reset_device = vhost_vdpa_reset_device,
461         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
462         .vhost_get_config  = vhost_vdpa_get_config,
463         .vhost_set_config = vhost_vdpa_set_config,
464         .vhost_requires_shm_log = NULL,
465         .vhost_migration_done = NULL,
466         .vhost_backend_can_merge = NULL,
467         .vhost_net_set_mtu = NULL,
468         .vhost_set_iotlb_callback = NULL,
469         .vhost_send_device_iotlb_msg = NULL,
470         .vhost_dev_start = vhost_vdpa_dev_start,
471         .vhost_get_device_id = vhost_vdpa_get_device_id,
472          .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
473          .vhost_force_iommu = vhost_vdpa_force_iommu,
474 };
475