xref: /openbmc/qemu/include/hw/virtio/vhost.h (revision 0f64fb674360393ae09605d8d53bf81c02c78a3e)
1 #ifndef VHOST_H
2 #define VHOST_H
3 
4 #include "hw/virtio/vhost-backend.h"
5 #include "hw/virtio/virtio.h"
6 #include "system/memory.h"
7 
8 #define VHOST_F_DEVICE_IOTLB 63
9 #define VHOST_USER_F_PROTOCOL_FEATURES 30
10 
11 #define VU_REALIZE_CONN_RETRIES 3
12 
13 /* Generic structures common for any vhost based device. */
14 
15 struct vhost_inflight {
16     int fd;
17     void *addr;
18     uint64_t size;
19     uint64_t offset;
20     uint16_t queue_size;
21 };
22 
23 struct vhost_virtqueue {
24     int kick;
25     int call;
26     void *desc;
27     void *avail;
28     void *used;
29     int num;
30     unsigned long long desc_phys;
31     unsigned desc_size;
32     unsigned long long avail_phys;
33     unsigned avail_size;
34     unsigned long long used_phys;
35     unsigned used_size;
36     EventNotifier masked_notifier;
37     EventNotifier error_notifier;
38     EventNotifier masked_config_notifier;
39     struct vhost_dev *dev;
40 };
41 
42 typedef unsigned long vhost_log_chunk_t;
43 #define VHOST_LOG_PAGE 0x1000
44 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
45 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
46 #define VHOST_INVALID_FEATURE_BIT   (0xff)
47 #define VHOST_QUEUE_NUM_CONFIG_INR 0
48 
49 struct vhost_log {
50     unsigned long long size;
51     int refcnt;
52     int fd;
53     vhost_log_chunk_t *log;
54 };
55 
56 struct vhost_dev;
57 struct vhost_iommu {
58     struct vhost_dev *hdev;
59     MemoryRegion *mr;
60     hwaddr iommu_offset;
61     IOMMUNotifier n;
62     QLIST_ENTRY(vhost_iommu) iommu_next;
63 };
64 
65 typedef struct VhostDevConfigOps {
66     /* Vhost device config space changed callback
67      */
68     int (*vhost_dev_config_notifier)(struct vhost_dev *dev);
69 } VhostDevConfigOps;
70 
71 struct vhost_memory;
72 
73 /**
74  * struct vhost_dev - common vhost_dev structure
75  * @vhost_ops: backend specific ops
76  * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier)
77  */
78 struct vhost_dev {
79     VirtIODevice *vdev;
80     MemoryListener memory_listener;
81     MemoryListener iommu_listener;
82     struct vhost_memory *mem;
83     int n_mem_sections;
84     MemoryRegionSection *mem_sections;
85     int n_tmp_sections;
86     MemoryRegionSection *tmp_sections;
87     struct vhost_virtqueue *vqs;
88     unsigned int nvqs;
89     /* the first virtqueue which would be used by this vhost dev */
90     int vq_index;
91     /* one past the last vq index for the virtio device (not vhost) */
92     int vq_index_end;
93     /* if non-zero, minimum required value for max_queues */
94     int num_queues;
95     /**
96      * vhost feature handling requires matching the feature set
97      * offered by a backend which may be a subset of the total
98      * features eventually offered to the guest.
99      *
100      * @features: available features provided by the backend
101      * @acked_features: final negotiated features with front-end driver
102      *
103      * @backend_features: this is used in a couple of places to either
104      * store VHOST_USER_F_PROTOCOL_FEATURES to apply to
105      * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its
106      * future use should be discouraged and the variable retired as
107      * its easy to confuse with the VirtIO backend_features.
108      */
109     uint64_t features;
110     uint64_t acked_features;
111     uint64_t backend_features;
112 
113     /**
114      * @protocol_features: is the vhost-user only feature set by
115      * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only
116      * negotiated if VHOST_USER_F_PROTOCOL_FEATURES has been offered
117      * by the backend (see @features).
118      */
119     uint64_t protocol_features;
120 
121     uint64_t max_queues;
122     uint64_t backend_cap;
123     /* @started: is the vhost device started? */
124     bool started;
125     bool log_enabled;
126     uint64_t log_size;
127     Error *migration_blocker;
128     const VhostOps *vhost_ops;
129     void *opaque;
130     struct vhost_log *log;
131     QLIST_ENTRY(vhost_dev) entry;
132     QLIST_ENTRY(vhost_dev) logdev_entry;
133     QLIST_HEAD(, vhost_iommu) iommu_list;
134     IOMMUNotifier n;
135     const VhostDevConfigOps *config_ops;
136 };
137 
138 extern const VhostOps kernel_ops;
139 extern const VhostOps user_ops;
140 extern const VhostOps vdpa_ops;
141 
142 struct vhost_net {
143     struct vhost_dev dev;
144     struct vhost_virtqueue vqs[2];
145     int backend;
146     NetClientState *nc;
147 };
148 
149 /**
150  * vhost_dev_init() - initialise the vhost interface
151  * @hdev: the common vhost_dev structure
152  * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa)
153  * @backend_type: type of backend
154  * @busyloop_timeout: timeout for polling virtqueue
155  * @errp: error handle
156  *
157  * The initialisation of the vhost device will trigger the
158  * initialisation of the backend and potentially capability
159  * negotiation of backend interface. Configuration of the VirtIO
160  * itself won't happen until the interface is started.
161  *
162  * Return: 0 on success, non-zero on error while setting errp.
163  */
164 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
165                    VhostBackendType backend_type,
166                    uint32_t busyloop_timeout, Error **errp);
167 
168 /**
169  * vhost_dev_cleanup() - tear down and cleanup vhost interface
170  * @hdev: the common vhost_dev structure
171  */
172 void vhost_dev_cleanup(struct vhost_dev *hdev);
173 
174 void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
175                                       VirtIODevice *vdev,
176                                       unsigned int nvqs);
177 
178 /**
179  * vhost_dev_enable_notifiers() - enable event notifiers
180  * @hdev: common vhost_dev structure
181  * @vdev: the VirtIODevice structure
182  *
183  * Enable notifications directly to the vhost device rather than being
184  * triggered by QEMU itself. Notifications should be enabled before
185  * the vhost device is started via @vhost_dev_start.
186  *
187  * Return: 0 on success, < 0 on error.
188  */
189 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
190 
191 /**
192  * vhost_dev_disable_notifiers - disable event notifications
193  * @hdev: common vhost_dev structure
194  * @vdev: the VirtIODevice structure
195  *
196  * Disable direct notifications to vhost device.
197  */
198 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
199 bool vhost_config_pending(struct vhost_dev *hdev);
200 void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);
201 
202 /**
203  * vhost_dev_is_started() - report status of vhost device
204  * @hdev: common vhost_dev structure
205  *
206  * Return the started status of the vhost device
207  */
208 static inline bool vhost_dev_is_started(struct vhost_dev *hdev)
209 {
210     return hdev->started;
211 }
212 
213 /**
214  * vhost_dev_start() - start the vhost device
215  * @hdev: common vhost_dev structure
216  * @vdev: the VirtIODevice structure
217  * @vrings: true to have vrings enabled in this call
218  *
219  * Starts the vhost device. From this point VirtIO feature negotiation
220  * can start and the device can start processing VirtIO transactions.
221  *
222  * Return: 0 on success, < 0 on error.
223  */
224 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
225 
226 /**
227  * vhost_dev_stop() - stop the vhost device
228  * @hdev: common vhost_dev structure
229  * @vdev: the VirtIODevice structure
230  * @vrings: true to have vrings disabled in this call
231  *
232  * Stop the vhost device. After the device is stopped the notifiers
233  * can be disabled (@vhost_dev_disable_notifiers) and the device can
234  * be torn down (@vhost_dev_cleanup).
235  *
236  * Return: 0 on success, != 0 on error when stopping dev.
237  */
238 int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
239 
240 /**
241  * vhost_dev_force_stop() - force stop the vhost device
242  * @hdev: common vhost_dev structure
243  * @vdev: the VirtIODevice structure
244  * @vrings: true to have vrings disabled in this call
245  *
246  * Force stop the vhost device. After the device is stopped the notifiers
247  * can be disabled (@vhost_dev_disable_notifiers) and the device can
248  * be torn down (@vhost_dev_cleanup). Unlike @vhost_dev_stop, this doesn't
249  * attempt to flush in-flight backend requests by skipping GET_VRING_BASE
250  * entirely.
251  */
252 int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev,
253                          bool vrings);
254 
255 /**
256  * DOC: vhost device configuration handling
257  *
258  * The VirtIO device configuration space is used for rarely changing
259  * or initialisation time parameters. The configuration can be updated
260  * by either the guest driver or the device itself. If the device can
261  * change the configuration over time the vhost handler should
262  * register a @VhostDevConfigOps structure with
263  * @vhost_dev_set_config_notifier so the guest can be notified. Some
264  * devices register a handler anyway and will signal an error if an
265  * unexpected config change happens.
266  */
267 
268 /**
269  * vhost_dev_get_config() - fetch device configuration
270  * @hdev: common vhost_dev_structure
271  * @config: pointer to device appropriate config structure
272  * @config_len: size of device appropriate config structure
273  *
274  * Return: 0 on success, < 0 on error while setting errp
275  */
276 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
277                          uint32_t config_len, Error **errp);
278 
279 /**
280  * vhost_dev_set_config() - set device configuration
281  * @hdev: common vhost_dev_structure
282  * @data: pointer to data to set
283  * @offset: offset into configuration space
284  * @size: length of set
285  * @flags: @VhostSetConfigType flags
286  *
287  * By use of @offset/@size a subset of the configuration space can be
288  * written to. The @flags are used to indicate if it is a normal
289  * transaction or related to migration.
290  *
291  * Return: 0 on success, non-zero on error
292  */
293 int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
294                          uint32_t offset, uint32_t size, uint32_t flags);
295 
296 /**
297  * vhost_dev_set_config_notifier() - register VhostDevConfigOps
298  * @hdev: common vhost_dev_structure
299  * @ops: notifier ops
300  *
301  * If the device is expected to change configuration a notifier can be
302  * setup to handle the case.
303  */
304 void vhost_dev_set_config_notifier(struct vhost_dev *dev,
305                                    const VhostDevConfigOps *ops);
306 
307 
308 /* Test and clear masked event pending status.
309  * Should be called after unmask to avoid losing events.
310  */
311 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
312 
313 /* Mask/unmask events from this vq.
314  */
315 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
316                           bool mask);
317 
318 /**
319  * vhost_get_features() - return a sanitised set of feature bits
320  * @hdev: common vhost_dev structure
321  * @feature_bits: pointer to terminated table of feature bits
322  * @features: original feature set
323  *
324  * This returns a set of features bits that is an intersection of what
325  * is supported by the vhost backend (hdev->features), the supported
326  * feature_bits and the requested feature set.
327  */
328 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
329                             uint64_t features);
330 
331 /**
332  * vhost_ack_features() - set vhost acked_features
333  * @hdev: common vhost_dev structure
334  * @feature_bits: pointer to terminated table of feature bits
335  * @features: requested feature set
336  *
337  * This sets the internal hdev->acked_features to the intersection of
338  * the backends advertised features and the supported feature_bits.
339  */
340 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
341                         uint64_t features);
342 unsigned int vhost_get_max_memslots(void);
343 unsigned int vhost_get_free_memslots(void);
344 
345 int vhost_net_set_backend(struct vhost_dev *hdev,
346                           struct vhost_vring_file *file);
347 
348 void vhost_toggle_device_iotlb(VirtIODevice *vdev);
349 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
350 
351 int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
352                           struct vhost_virtqueue *vq, unsigned idx);
353 int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
354                          struct vhost_virtqueue *vq, unsigned idx);
355 
356 void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
357 void vhost_dev_free_inflight(struct vhost_inflight *inflight);
358 int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev);
359 int vhost_dev_set_inflight(struct vhost_dev *dev,
360                            struct vhost_inflight *inflight);
361 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
362                            struct vhost_inflight *inflight);
363 bool vhost_dev_has_iommu(struct vhost_dev *dev);
364 
365 #ifdef CONFIG_VHOST
366 int vhost_reset_device(struct vhost_dev *hdev);
367 #else
368 static inline int vhost_reset_device(struct vhost_dev *hdev)
369 {
370     return -ENOSYS;
371 }
372 #endif /* CONFIG_VHOST */
373 
374 /**
375  * vhost_supports_device_state(): Checks whether the back-end supports
376  * transferring internal device state for the purpose of migration.
377  * Support for this feature is required for vhost_set_device_state_fd()
378  * and vhost_check_device_state().
379  *
380  * @dev: The vhost device
381  *
382  * Returns true if the device supports these commands, and false if it
383  * does not.
384  */
385 #ifdef CONFIG_VHOST
386 bool vhost_supports_device_state(struct vhost_dev *dev);
387 #else
388 static inline bool vhost_supports_device_state(struct vhost_dev *dev)
389 {
390     return false;
391 }
392 #endif
393 
394 /**
395  * vhost_set_device_state_fd(): Begin transfer of internal state from/to
396  * the back-end for the purpose of migration.  Data is to be transferred
397  * over a pipe according to @direction and @phase.  The sending end must
398  * only write to the pipe, and the receiving end must only read from it.
399  * Once the sending end is done, it closes its FD.  The receiving end
400  * must take this as the end-of-transfer signal and close its FD, too.
401  *
402  * @fd is the back-end's end of the pipe: The write FD for SAVE, and the
403  * read FD for LOAD.  This function transfers ownership of @fd to the
404  * back-end, i.e. closes it in the front-end.
405  *
406  * The back-end may optionally reply with an FD of its own, if this
407  * improves efficiency on its end.  In this case, the returned FD is
408  * stored in *reply_fd.  The back-end will discard the FD sent to it,
409  * and the front-end must use *reply_fd for transferring state to/from
410  * the back-end.
411  *
412  * @dev: The vhost device
413  * @direction: The direction in which the state is to be transferred.
414  *             For outgoing migrations, this is SAVE, and data is read
415  *             from the back-end and stored by the front-end in the
416  *             migration stream.
417  *             For incoming migrations, this is LOAD, and data is read
418  *             by the front-end from the migration stream and sent to
419  *             the back-end to restore the saved state.
420  * @phase: Which migration phase we are in.  Currently, there is only
421  *         STOPPED (device and all vrings are stopped), in the future,
422  *         more phases such as PRE_COPY or POST_COPY may be added.
423  * @fd: Back-end's end of the pipe through which to transfer state; note
424  *      that ownership is transferred to the back-end, so this function
425  *      closes @fd in the front-end.
426  * @reply_fd: If the back-end wishes to use a different pipe for state
427  *            transfer, this will contain an FD for the front-end to
428  *            use.  Otherwise, -1 is stored here.
429  * @errp: Potential error description
430  *
431  * Returns 0 on success, and -errno on failure.
432  */
433 int vhost_set_device_state_fd(struct vhost_dev *dev,
434                               VhostDeviceStateDirection direction,
435                               VhostDeviceStatePhase phase,
436                               int fd,
437                               int *reply_fd,
438                               Error **errp);
439 
440 /**
441  * vhost_set_device_state_fd(): After transferring state from/to the
442  * back-end via vhost_set_device_state_fd(), i.e. once the sending end
443  * has closed the pipe, inquire the back-end to report any potential
444  * errors that have occurred on its side.  This allows to sense errors
445  * like:
446  * - During outgoing migration, when the source side had already started
447  *   to produce its state, something went wrong and it failed to finish
448  * - During incoming migration, when the received state is somehow
449  *   invalid and cannot be processed by the back-end
450  *
451  * @dev: The vhost device
452  * @errp: Potential error description
453  *
454  * Returns 0 when the back-end reports successful state transfer and
455  * processing, and -errno when an error occurred somewhere.
456  */
457 int vhost_check_device_state(struct vhost_dev *dev, Error **errp);
458 
459 /**
460  * vhost_save_backend_state(): High-level function to receive a vhost
461  * back-end's state, and save it in @f.  Uses
462  * `vhost_set_device_state_fd()` to get the data from the back-end, and
463  * stores it in consecutive chunks that are each prefixed by their
464  * respective length (be32).  The end is marked by a 0-length chunk.
465  *
466  * Must only be called while the device and all its vrings are stopped
467  * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
468  *
469  * @dev: The vhost device from which to save the state
470  * @f: Migration stream in which to save the state
471  * @errp: Potential error message
472  *
473  * Returns 0 on success, and -errno otherwise.
474  */
475 #ifdef CONFIG_VHOST
476 int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
477 #else
478 static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f,
479                                            Error **errp)
480 {
481     return -ENOSYS;
482 }
483 #endif
484 
485 /**
486  * vhost_load_backend_state(): High-level function to load a vhost
487  * back-end's state from @f, and send it over to the back-end.  Reads
488  * the data from @f in the format used by `vhost_save_state()`, and uses
489  * `vhost_set_device_state_fd()` to transfer it to the back-end.
490  *
491  * Must only be called while the device and all its vrings are stopped
492  * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
493  *
494  * @dev: The vhost device to which to send the state
495  * @f: Migration stream from which to load the state
496  * @errp: Potential error message
497  *
498  * Returns 0 on success, and -errno otherwise.
499  */
500 #ifdef CONFIG_VHOST
501 int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
502 #else
503 static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f,
504                                            Error **errp)
505 {
506     return -ENOSYS;
507 }
508 #endif
509 
510 #endif
511