xref: /openbmc/qemu/include/hw/virtio/vhost.h (revision 92a0dcbd751d771512b9dedd97e00553181b7699)
1 #ifndef VHOST_H
2 #define VHOST_H
3 
4 #include "net/vhost_net.h"
5 #include "hw/virtio/vhost-backend.h"
6 #include "hw/virtio/virtio.h"
7 #include "system/memory.h"
8 
9 #define VHOST_F_DEVICE_IOTLB 63
10 #define VHOST_USER_F_PROTOCOL_FEATURES 30
11 
12 #define VU_REALIZE_CONN_RETRIES 3
13 
14 /* Generic structures common for any vhost based device. */
15 
16 struct vhost_inflight {
17     int fd;
18     void *addr;
19     uint64_t size;
20     uint64_t offset;
21     uint16_t queue_size;
22 };
23 
24 struct vhost_virtqueue {
25     int kick;
26     int call;
27     void *desc;
28     void *avail;
29     void *used;
30     int num;
31     unsigned long long desc_phys;
32     unsigned desc_size;
33     unsigned long long avail_phys;
34     unsigned avail_size;
35     unsigned long long used_phys;
36     unsigned used_size;
37     EventNotifier masked_notifier;
38     EventNotifier error_notifier;
39     EventNotifier masked_config_notifier;
40     struct vhost_dev *dev;
41 };
42 
43 typedef unsigned long vhost_log_chunk_t;
44 #define VHOST_LOG_PAGE 0x1000
45 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
46 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
47 #define VHOST_INVALID_FEATURE_BIT   (0xff)
48 #define VHOST_QUEUE_NUM_CONFIG_INR 0
49 
50 struct vhost_log {
51     unsigned long long size;
52     int refcnt;
53     int fd;
54     vhost_log_chunk_t *log;
55 };
56 
57 struct vhost_dev;
58 struct vhost_iommu {
59     struct vhost_dev *hdev;
60     MemoryRegion *mr;
61     hwaddr iommu_offset;
62     IOMMUNotifier n;
63     QLIST_ENTRY(vhost_iommu) iommu_next;
64 };
65 
66 typedef struct VhostDevConfigOps {
67     /* Vhost device config space changed callback
68      */
69     int (*vhost_dev_config_notifier)(struct vhost_dev *dev);
70 } VhostDevConfigOps;
71 
72 struct vhost_memory;
73 
74 /**
75  * struct vhost_dev - common vhost_dev structure
76  * @vhost_ops: backend specific ops
77  * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier)
78  */
79 struct vhost_dev {
80     VirtIODevice *vdev;
81     MemoryListener memory_listener;
82     MemoryListener iommu_listener;
83     struct vhost_memory *mem;
84     int n_mem_sections;
85     MemoryRegionSection *mem_sections;
86     int n_tmp_sections;
87     MemoryRegionSection *tmp_sections;
88     struct vhost_virtqueue *vqs;
89     unsigned int nvqs;
90     /* the first virtqueue which would be used by this vhost dev */
91     int vq_index;
92     /* one past the last vq index for the virtio device (not vhost) */
93     int vq_index_end;
94     /* if non-zero, minimum required value for max_queues */
95     int num_queues;
96     /**
97      * vhost feature handling requires matching the feature set
98      * offered by a backend which may be a subset of the total
99      * features eventually offered to the guest.
100      *
101      * @features: available features provided by the backend
102      * @acked_features: final negotiated features with front-end driver
103      *
104      * @backend_features: this is used in a couple of places to either
105      * store VHOST_USER_F_PROTOCOL_FEATURES to apply to
106      * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its
107      * future use should be discouraged and the variable retired as
108      * its easy to confuse with the VirtIO backend_features.
109      */
110     VIRTIO_DECLARE_FEATURES(features);
111     VIRTIO_DECLARE_FEATURES(acked_features);
112     VIRTIO_DECLARE_FEATURES(backend_features);
113 
114     /**
115      * @protocol_features: is the vhost-user only feature set by
116      * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only
117      * negotiated if VHOST_USER_F_PROTOCOL_FEATURES has been offered
118      * by the backend (see @features).
119      */
120     uint64_t protocol_features;
121 
122     uint64_t max_queues;
123     uint64_t backend_cap;
124     /* @started: is the vhost device started? */
125     bool started;
126     bool log_enabled;
127     uint64_t log_size;
128     Error *migration_blocker;
129     const VhostOps *vhost_ops;
130     void *opaque;
131     struct vhost_log *log;
132     QLIST_ENTRY(vhost_dev) entry;
133     QLIST_ENTRY(vhost_dev) logdev_entry;
134     QLIST_HEAD(, vhost_iommu) iommu_list;
135     IOMMUNotifier n;
136     const VhostDevConfigOps *config_ops;
137 };
138 
139 extern const VhostOps kernel_ops;
140 extern const VhostOps user_ops;
141 extern const VhostOps vdpa_ops;
142 
143 struct vhost_net {
144     struct vhost_dev dev;
145     struct vhost_virtqueue vqs[2];
146     int backend;
147     const int *feature_bits;
148     int max_tx_queue_size;
149     SaveAcketFeatures *save_acked_features;
150     bool is_vhost_user;
151     NetClientState *nc;
152 };
153 
154 /**
155  * vhost_dev_init() - initialise the vhost interface
156  * @hdev: the common vhost_dev structure
157  * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa)
158  * @backend_type: type of backend
159  * @busyloop_timeout: timeout for polling virtqueue
160  * @errp: error handle
161  *
162  * The initialisation of the vhost device will trigger the
163  * initialisation of the backend and potentially capability
164  * negotiation of backend interface. Configuration of the VirtIO
165  * itself won't happen until the interface is started.
166  *
167  * Return: 0 on success, non-zero on error while setting errp.
168  */
169 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
170                    VhostBackendType backend_type,
171                    uint32_t busyloop_timeout, Error **errp);
172 
173 /**
174  * vhost_dev_cleanup() - tear down and cleanup vhost interface
175  * @hdev: the common vhost_dev structure
176  */
177 void vhost_dev_cleanup(struct vhost_dev *hdev);
178 
179 void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
180                                       VirtIODevice *vdev,
181                                       unsigned int nvqs);
182 
183 /**
184  * vhost_dev_enable_notifiers() - enable event notifiers
185  * @hdev: common vhost_dev structure
186  * @vdev: the VirtIODevice structure
187  *
188  * Enable notifications directly to the vhost device rather than being
189  * triggered by QEMU itself. Notifications should be enabled before
190  * the vhost device is started via @vhost_dev_start.
191  *
192  * Return: 0 on success, < 0 on error.
193  */
194 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
195 
196 /**
197  * vhost_dev_disable_notifiers - disable event notifications
198  * @hdev: common vhost_dev structure
199  * @vdev: the VirtIODevice structure
200  *
201  * Disable direct notifications to vhost device.
202  */
203 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
204 bool vhost_config_pending(struct vhost_dev *hdev);
205 void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);
206 
207 /**
208  * vhost_dev_is_started() - report status of vhost device
209  * @hdev: common vhost_dev structure
210  *
211  * Return the started status of the vhost device
212  */
213 static inline bool vhost_dev_is_started(struct vhost_dev *hdev)
214 {
215     return hdev->started;
216 }
217 
218 /**
219  * vhost_dev_start() - start the vhost device
220  * @hdev: common vhost_dev structure
221  * @vdev: the VirtIODevice structure
222  * @vrings: true to have vrings enabled in this call
223  *
224  * Starts the vhost device. From this point VirtIO feature negotiation
225  * can start and the device can start processing VirtIO transactions.
226  *
227  * Return: 0 on success, < 0 on error.
228  */
229 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
230 
231 /**
232  * vhost_dev_stop() - stop the vhost device
233  * @hdev: common vhost_dev structure
234  * @vdev: the VirtIODevice structure
235  * @vrings: true to have vrings disabled in this call
236  *
237  * Stop the vhost device. After the device is stopped the notifiers
238  * can be disabled (@vhost_dev_disable_notifiers) and the device can
239  * be torn down (@vhost_dev_cleanup).
240  *
241  * Return: 0 on success, != 0 on error when stopping dev.
242  */
243 int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
244 
245 /**
246  * vhost_dev_force_stop() - force stop the vhost device
247  * @hdev: common vhost_dev structure
248  * @vdev: the VirtIODevice structure
249  * @vrings: true to have vrings disabled in this call
250  *
251  * Force stop the vhost device. After the device is stopped the notifiers
252  * can be disabled (@vhost_dev_disable_notifiers) and the device can
253  * be torn down (@vhost_dev_cleanup). Unlike @vhost_dev_stop, this doesn't
254  * attempt to flush in-flight backend requests by skipping GET_VRING_BASE
255  * entirely.
256  */
257 int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev,
258                          bool vrings);
259 
260 /**
261  * DOC: vhost device configuration handling
262  *
263  * The VirtIO device configuration space is used for rarely changing
264  * or initialisation time parameters. The configuration can be updated
265  * by either the guest driver or the device itself. If the device can
266  * change the configuration over time the vhost handler should
267  * register a @VhostDevConfigOps structure with
268  * @vhost_dev_set_config_notifier so the guest can be notified. Some
269  * devices register a handler anyway and will signal an error if an
270  * unexpected config change happens.
271  */
272 
273 /**
274  * vhost_dev_get_config() - fetch device configuration
275  * @hdev: common vhost_dev_structure
276  * @config: pointer to device appropriate config structure
277  * @config_len: size of device appropriate config structure
278  *
279  * Return: 0 on success, < 0 on error while setting errp
280  */
281 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
282                          uint32_t config_len, Error **errp);
283 
284 /**
285  * vhost_dev_set_config() - set device configuration
286  * @hdev: common vhost_dev_structure
287  * @data: pointer to data to set
288  * @offset: offset into configuration space
289  * @size: length of set
290  * @flags: @VhostSetConfigType flags
291  *
292  * By use of @offset/@size a subset of the configuration space can be
293  * written to. The @flags are used to indicate if it is a normal
294  * transaction or related to migration.
295  *
296  * Return: 0 on success, non-zero on error
297  */
298 int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
299                          uint32_t offset, uint32_t size, uint32_t flags);
300 
301 /**
302  * vhost_dev_set_config_notifier() - register VhostDevConfigOps
303  * @hdev: common vhost_dev_structure
304  * @ops: notifier ops
305  *
306  * If the device is expected to change configuration a notifier can be
307  * setup to handle the case.
308  */
309 void vhost_dev_set_config_notifier(struct vhost_dev *dev,
310                                    const VhostDevConfigOps *ops);
311 
312 
313 /* Test and clear masked event pending status.
314  * Should be called after unmask to avoid losing events.
315  */
316 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
317 
318 /* Mask/unmask events from this vq.
319  */
320 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
321                           bool mask);
322 
323 /**
324  * vhost_get_features_ex() - sanitize the extended features set
325  * @hdev: common vhost_dev structure
326  * @feature_bits: pointer to terminated table of feature bits
327  * @features: original features set, filtered out on return
328  *
329  * This is the extended variant of vhost_get_features(), supporting the
330  * the extended features set. Filter it with the intersection of what is
331  * supported by the vhost backend (hdev->features) and the supported
332  * feature_bits.
333  */
334 void vhost_get_features_ex(struct vhost_dev *hdev,
335                            const int *feature_bits,
336                            uint64_t *features);
337 /**
338  * vhost_get_features() - return a sanitised set of feature bits
339  * @hdev: common vhost_dev structure
340  * @feature_bits: pointer to terminated table of feature bits
341  * @features: original feature set
342  *
343  * This returns a set of features bits that is an intersection of what
344  * is supported by the vhost backend (hdev->features), the supported
345  * feature_bits and the requested feature set.
346  */
347 static inline uint64_t vhost_get_features(struct vhost_dev *hdev,
348                                           const int *feature_bits,
349                                           uint64_t features)
350 {
351     uint64_t features_ex[VIRTIO_FEATURES_NU64S];
352 
353     virtio_features_from_u64(features_ex, features);
354     vhost_get_features_ex(hdev, feature_bits, features_ex);
355     return features_ex[0];
356 }
357 
358 /**
359  * vhost_ack_features_ex() - set vhost full set of acked_features
360  * @hdev: common vhost_dev structure
361  * @feature_bits: pointer to terminated table of feature bits
362  * @features: requested feature set
363  *
364  * This sets the internal hdev->acked_features to the intersection of
365  * the backends advertised features and the supported feature_bits.
366  */
367 void vhost_ack_features_ex(struct vhost_dev *hdev, const int *feature_bits,
368                            const uint64_t *features);
369 
370 /**
371  * vhost_ack_features() - set vhost acked_features
372  * @hdev: common vhost_dev structure
373  * @feature_bits: pointer to terminated table of feature bits
374  * @features: requested feature set
375  *
376  * This sets the internal hdev->acked_features to the intersection of
377  * the backends advertised features and the supported feature_bits.
378  */
379 static inline void vhost_ack_features(struct vhost_dev *hdev,
380                                       const int *feature_bits,
381                                       uint64_t features)
382 {
383     uint64_t features_ex[VIRTIO_FEATURES_NU64S];
384 
385     virtio_features_from_u64(features_ex, features);
386     vhost_ack_features_ex(hdev, feature_bits, features_ex);
387 }
388 
389 unsigned int vhost_get_max_memslots(void);
390 unsigned int vhost_get_free_memslots(void);
391 
392 int vhost_net_set_backend(struct vhost_dev *hdev,
393                           struct vhost_vring_file *file);
394 
395 void vhost_toggle_device_iotlb(VirtIODevice *vdev);
396 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
397 
398 int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
399                           struct vhost_virtqueue *vq, unsigned idx);
400 int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
401                          struct vhost_virtqueue *vq, unsigned idx);
402 
403 void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
404 void vhost_dev_free_inflight(struct vhost_inflight *inflight);
405 int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev);
406 int vhost_dev_set_inflight(struct vhost_dev *dev,
407                            struct vhost_inflight *inflight);
408 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
409                            struct vhost_inflight *inflight);
410 bool vhost_dev_has_iommu(struct vhost_dev *dev);
411 
412 #ifdef CONFIG_VHOST
413 int vhost_reset_device(struct vhost_dev *hdev);
414 #else
415 static inline int vhost_reset_device(struct vhost_dev *hdev)
416 {
417     return -ENOSYS;
418 }
419 #endif /* CONFIG_VHOST */
420 
421 /**
422  * vhost_supports_device_state(): Checks whether the back-end supports
423  * transferring internal device state for the purpose of migration.
424  * Support for this feature is required for vhost_set_device_state_fd()
425  * and vhost_check_device_state().
426  *
427  * @dev: The vhost device
428  *
429  * Returns true if the device supports these commands, and false if it
430  * does not.
431  */
432 #ifdef CONFIG_VHOST
433 bool vhost_supports_device_state(struct vhost_dev *dev);
434 #else
435 static inline bool vhost_supports_device_state(struct vhost_dev *dev)
436 {
437     return false;
438 }
439 #endif
440 
441 /**
442  * vhost_set_device_state_fd(): Begin transfer of internal state from/to
443  * the back-end for the purpose of migration.  Data is to be transferred
444  * over a pipe according to @direction and @phase.  The sending end must
445  * only write to the pipe, and the receiving end must only read from it.
446  * Once the sending end is done, it closes its FD.  The receiving end
447  * must take this as the end-of-transfer signal and close its FD, too.
448  *
449  * @fd is the back-end's end of the pipe: The write FD for SAVE, and the
450  * read FD for LOAD.  This function transfers ownership of @fd to the
451  * back-end, i.e. closes it in the front-end.
452  *
453  * The back-end may optionally reply with an FD of its own, if this
454  * improves efficiency on its end.  In this case, the returned FD is
455  * stored in *reply_fd.  The back-end will discard the FD sent to it,
456  * and the front-end must use *reply_fd for transferring state to/from
457  * the back-end.
458  *
459  * @dev: The vhost device
460  * @direction: The direction in which the state is to be transferred.
461  *             For outgoing migrations, this is SAVE, and data is read
462  *             from the back-end and stored by the front-end in the
463  *             migration stream.
464  *             For incoming migrations, this is LOAD, and data is read
465  *             by the front-end from the migration stream and sent to
466  *             the back-end to restore the saved state.
467  * @phase: Which migration phase we are in.  Currently, there is only
468  *         STOPPED (device and all vrings are stopped), in the future,
469  *         more phases such as PRE_COPY or POST_COPY may be added.
470  * @fd: Back-end's end of the pipe through which to transfer state; note
471  *      that ownership is transferred to the back-end, so this function
472  *      closes @fd in the front-end.
473  * @reply_fd: If the back-end wishes to use a different pipe for state
474  *            transfer, this will contain an FD for the front-end to
475  *            use.  Otherwise, -1 is stored here.
476  * @errp: Potential error description
477  *
478  * Returns 0 on success, and -errno on failure.
479  */
480 int vhost_set_device_state_fd(struct vhost_dev *dev,
481                               VhostDeviceStateDirection direction,
482                               VhostDeviceStatePhase phase,
483                               int fd,
484                               int *reply_fd,
485                               Error **errp);
486 
487 /**
488  * vhost_set_device_state_fd(): After transferring state from/to the
489  * back-end via vhost_set_device_state_fd(), i.e. once the sending end
490  * has closed the pipe, inquire the back-end to report any potential
491  * errors that have occurred on its side.  This allows to sense errors
492  * like:
493  * - During outgoing migration, when the source side had already started
494  *   to produce its state, something went wrong and it failed to finish
495  * - During incoming migration, when the received state is somehow
496  *   invalid and cannot be processed by the back-end
497  *
498  * @dev: The vhost device
499  * @errp: Potential error description
500  *
501  * Returns 0 when the back-end reports successful state transfer and
502  * processing, and -errno when an error occurred somewhere.
503  */
504 int vhost_check_device_state(struct vhost_dev *dev, Error **errp);
505 
506 /**
507  * vhost_save_backend_state(): High-level function to receive a vhost
508  * back-end's state, and save it in @f.  Uses
509  * `vhost_set_device_state_fd()` to get the data from the back-end, and
510  * stores it in consecutive chunks that are each prefixed by their
511  * respective length (be32).  The end is marked by a 0-length chunk.
512  *
513  * Must only be called while the device and all its vrings are stopped
514  * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
515  *
516  * @dev: The vhost device from which to save the state
517  * @f: Migration stream in which to save the state
518  * @errp: Potential error message
519  *
520  * Returns 0 on success, and -errno otherwise.
521  */
522 #ifdef CONFIG_VHOST
523 int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
524 #else
525 static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f,
526                                            Error **errp)
527 {
528     return -ENOSYS;
529 }
530 #endif
531 
532 /**
533  * vhost_load_backend_state(): High-level function to load a vhost
534  * back-end's state from @f, and send it over to the back-end.  Reads
535  * the data from @f in the format used by `vhost_save_state()`, and uses
536  * `vhost_set_device_state_fd()` to transfer it to the back-end.
537  *
538  * Must only be called while the device and all its vrings are stopped
539  * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
540  *
541  * @dev: The vhost device to which to send the state
542  * @f: Migration stream from which to load the state
543  * @errp: Potential error message
544  *
545  * Returns 0 on success, and -errno otherwise.
546  */
547 #ifdef CONFIG_VHOST
548 int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
549 #else
550 static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f,
551                                            Error **errp)
552 {
553     return -ENOSYS;
554 }
555 #endif
556 
557 #endif
558