1 #ifndef VHOST_H 2 #define VHOST_H 3 4 #include "hw/virtio/vhost-backend.h" 5 #include "hw/virtio/virtio.h" 6 #include "system/memory.h" 7 8 #define VHOST_F_DEVICE_IOTLB 63 9 #define VHOST_USER_F_PROTOCOL_FEATURES 30 10 11 #define VU_REALIZE_CONN_RETRIES 3 12 13 /* Generic structures common for any vhost based device. */ 14 15 struct vhost_inflight { 16 int fd; 17 void *addr; 18 uint64_t size; 19 uint64_t offset; 20 uint16_t queue_size; 21 }; 22 23 struct vhost_virtqueue { 24 int kick; 25 int call; 26 void *desc; 27 void *avail; 28 void *used; 29 int num; 30 unsigned long long desc_phys; 31 unsigned desc_size; 32 unsigned long long avail_phys; 33 unsigned avail_size; 34 unsigned long long used_phys; 35 unsigned used_size; 36 EventNotifier masked_notifier; 37 EventNotifier error_notifier; 38 EventNotifier masked_config_notifier; 39 struct vhost_dev *dev; 40 }; 41 42 typedef unsigned long vhost_log_chunk_t; 43 #define VHOST_LOG_PAGE 0x1000 44 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) 45 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) 46 #define VHOST_INVALID_FEATURE_BIT (0xff) 47 #define VHOST_QUEUE_NUM_CONFIG_INR 0 48 49 struct vhost_log { 50 unsigned long long size; 51 int refcnt; 52 int fd; 53 vhost_log_chunk_t *log; 54 }; 55 56 struct vhost_dev; 57 struct vhost_iommu { 58 struct vhost_dev *hdev; 59 MemoryRegion *mr; 60 hwaddr iommu_offset; 61 IOMMUNotifier n; 62 QLIST_ENTRY(vhost_iommu) iommu_next; 63 }; 64 65 typedef struct VhostDevConfigOps { 66 /* Vhost device config space changed callback 67 */ 68 int (*vhost_dev_config_notifier)(struct vhost_dev *dev); 69 } VhostDevConfigOps; 70 71 struct vhost_memory; 72 73 /** 74 * struct vhost_dev - common vhost_dev structure 75 * @vhost_ops: backend specific ops 76 * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier) 77 */ 78 struct vhost_dev { 79 VirtIODevice *vdev; 80 MemoryListener memory_listener; 81 MemoryListener iommu_listener; 82 struct vhost_memory *mem; 83 int n_mem_sections; 84 MemoryRegionSection *mem_sections; 85 int n_tmp_sections; 86 MemoryRegionSection *tmp_sections; 87 struct vhost_virtqueue *vqs; 88 unsigned int nvqs; 89 /* the first virtqueue which would be used by this vhost dev */ 90 int vq_index; 91 /* one past the last vq index for the virtio device (not vhost) */ 92 int vq_index_end; 93 /* if non-zero, minimum required value for max_queues */ 94 int num_queues; 95 /** 96 * vhost feature handling requires matching the feature set 97 * offered by a backend which may be a subset of the total 98 * features eventually offered to the guest. 99 * 100 * @features: available features provided by the backend 101 * @acked_features: final negotiated features with front-end driver 102 * 103 * @backend_features: this is used in a couple of places to either 104 * store VHOST_USER_F_PROTOCOL_FEATURES to apply to 105 * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its 106 * future use should be discouraged and the variable retired as 107 * its easy to confuse with the VirtIO backend_features. 108 */ 109 uint64_t features; 110 uint64_t acked_features; 111 uint64_t backend_features; 112 113 /** 114 * @protocol_features: is the vhost-user only feature set by 115 * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only 116 * negotiated if VHOST_USER_F_PROTOCOL_FEATURES has been offered 117 * by the backend (see @features). 118 */ 119 uint64_t protocol_features; 120 121 uint64_t max_queues; 122 uint64_t backend_cap; 123 /* @started: is the vhost device started? */ 124 bool started; 125 bool log_enabled; 126 uint64_t log_size; 127 Error *migration_blocker; 128 const VhostOps *vhost_ops; 129 void *opaque; 130 struct vhost_log *log; 131 QLIST_ENTRY(vhost_dev) entry; 132 QLIST_ENTRY(vhost_dev) logdev_entry; 133 QLIST_HEAD(, vhost_iommu) iommu_list; 134 IOMMUNotifier n; 135 const VhostDevConfigOps *config_ops; 136 }; 137 138 extern const VhostOps kernel_ops; 139 extern const VhostOps user_ops; 140 extern const VhostOps vdpa_ops; 141 142 struct vhost_net { 143 struct vhost_dev dev; 144 struct vhost_virtqueue vqs[2]; 145 int backend; 146 NetClientState *nc; 147 }; 148 149 /** 150 * vhost_dev_init() - initialise the vhost interface 151 * @hdev: the common vhost_dev structure 152 * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa) 153 * @backend_type: type of backend 154 * @busyloop_timeout: timeout for polling virtqueue 155 * @errp: error handle 156 * 157 * The initialisation of the vhost device will trigger the 158 * initialisation of the backend and potentially capability 159 * negotiation of backend interface. Configuration of the VirtIO 160 * itself won't happen until the interface is started. 161 * 162 * Return: 0 on success, non-zero on error while setting errp. 163 */ 164 int vhost_dev_init(struct vhost_dev *hdev, void *opaque, 165 VhostBackendType backend_type, 166 uint32_t busyloop_timeout, Error **errp); 167 168 /** 169 * vhost_dev_cleanup() - tear down and cleanup vhost interface 170 * @hdev: the common vhost_dev structure 171 */ 172 void vhost_dev_cleanup(struct vhost_dev *hdev); 173 174 void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, 175 VirtIODevice *vdev, 176 unsigned int nvqs); 177 178 /** 179 * vhost_dev_enable_notifiers() - enable event notifiers 180 * @hdev: common vhost_dev structure 181 * @vdev: the VirtIODevice structure 182 * 183 * Enable notifications directly to the vhost device rather than being 184 * triggered by QEMU itself. Notifications should be enabled before 185 * the vhost device is started via @vhost_dev_start. 186 * 187 * Return: 0 on success, < 0 on error. 188 */ 189 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); 190 191 /** 192 * vhost_dev_disable_notifiers - disable event notifications 193 * @hdev: common vhost_dev structure 194 * @vdev: the VirtIODevice structure 195 * 196 * Disable direct notifications to vhost device. 197 */ 198 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); 199 bool vhost_config_pending(struct vhost_dev *hdev); 200 void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); 201 202 /** 203 * vhost_dev_is_started() - report status of vhost device 204 * @hdev: common vhost_dev structure 205 * 206 * Return the started status of the vhost device 207 */ 208 static inline bool vhost_dev_is_started(struct vhost_dev *hdev) 209 { 210 return hdev->started; 211 } 212 213 /** 214 * vhost_dev_start() - start the vhost device 215 * @hdev: common vhost_dev structure 216 * @vdev: the VirtIODevice structure 217 * @vrings: true to have vrings enabled in this call 218 * 219 * Starts the vhost device. From this point VirtIO feature negotiation 220 * can start and the device can start processing VirtIO transactions. 221 * 222 * Return: 0 on success, < 0 on error. 223 */ 224 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); 225 226 /** 227 * vhost_dev_stop() - stop the vhost device 228 * @hdev: common vhost_dev structure 229 * @vdev: the VirtIODevice structure 230 * @vrings: true to have vrings disabled in this call 231 * 232 * Stop the vhost device. After the device is stopped the notifiers 233 * can be disabled (@vhost_dev_disable_notifiers) and the device can 234 * be torn down (@vhost_dev_cleanup). 235 * 236 * Return: 0 on success, != 0 on error when stopping dev. 237 */ 238 int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); 239 240 /** 241 * vhost_dev_force_stop() - force stop the vhost device 242 * @hdev: common vhost_dev structure 243 * @vdev: the VirtIODevice structure 244 * @vrings: true to have vrings disabled in this call 245 * 246 * Force stop the vhost device. After the device is stopped the notifiers 247 * can be disabled (@vhost_dev_disable_notifiers) and the device can 248 * be torn down (@vhost_dev_cleanup). Unlike @vhost_dev_stop, this doesn't 249 * attempt to flush in-flight backend requests by skipping GET_VRING_BASE 250 * entirely. 251 */ 252 int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev, 253 bool vrings); 254 255 /** 256 * DOC: vhost device configuration handling 257 * 258 * The VirtIO device configuration space is used for rarely changing 259 * or initialisation time parameters. The configuration can be updated 260 * by either the guest driver or the device itself. If the device can 261 * change the configuration over time the vhost handler should 262 * register a @VhostDevConfigOps structure with 263 * @vhost_dev_set_config_notifier so the guest can be notified. Some 264 * devices register a handler anyway and will signal an error if an 265 * unexpected config change happens. 266 */ 267 268 /** 269 * vhost_dev_get_config() - fetch device configuration 270 * @hdev: common vhost_dev_structure 271 * @config: pointer to device appropriate config structure 272 * @config_len: size of device appropriate config structure 273 * 274 * Return: 0 on success, < 0 on error while setting errp 275 */ 276 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, 277 uint32_t config_len, Error **errp); 278 279 /** 280 * vhost_dev_set_config() - set device configuration 281 * @hdev: common vhost_dev_structure 282 * @data: pointer to data to set 283 * @offset: offset into configuration space 284 * @size: length of set 285 * @flags: @VhostSetConfigType flags 286 * 287 * By use of @offset/@size a subset of the configuration space can be 288 * written to. The @flags are used to indicate if it is a normal 289 * transaction or related to migration. 290 * 291 * Return: 0 on success, non-zero on error 292 */ 293 int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, 294 uint32_t offset, uint32_t size, uint32_t flags); 295 296 /** 297 * vhost_dev_set_config_notifier() - register VhostDevConfigOps 298 * @hdev: common vhost_dev_structure 299 * @ops: notifier ops 300 * 301 * If the device is expected to change configuration a notifier can be 302 * setup to handle the case. 303 */ 304 void vhost_dev_set_config_notifier(struct vhost_dev *dev, 305 const VhostDevConfigOps *ops); 306 307 308 /* Test and clear masked event pending status. 309 * Should be called after unmask to avoid losing events. 310 */ 311 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); 312 313 /* Mask/unmask events from this vq. 314 */ 315 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, 316 bool mask); 317 318 /** 319 * vhost_get_features() - return a sanitised set of feature bits 320 * @hdev: common vhost_dev structure 321 * @feature_bits: pointer to terminated table of feature bits 322 * @features: original feature set 323 * 324 * This returns a set of features bits that is an intersection of what 325 * is supported by the vhost backend (hdev->features), the supported 326 * feature_bits and the requested feature set. 327 */ 328 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, 329 uint64_t features); 330 331 /** 332 * vhost_ack_features() - set vhost acked_features 333 * @hdev: common vhost_dev structure 334 * @feature_bits: pointer to terminated table of feature bits 335 * @features: requested feature set 336 * 337 * This sets the internal hdev->acked_features to the intersection of 338 * the backends advertised features and the supported feature_bits. 339 */ 340 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, 341 uint64_t features); 342 unsigned int vhost_get_max_memslots(void); 343 unsigned int vhost_get_free_memslots(void); 344 345 int vhost_net_set_backend(struct vhost_dev *hdev, 346 struct vhost_vring_file *file); 347 348 void vhost_toggle_device_iotlb(VirtIODevice *vdev); 349 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); 350 351 int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, 352 struct vhost_virtqueue *vq, unsigned idx); 353 int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, 354 struct vhost_virtqueue *vq, unsigned idx); 355 356 void vhost_dev_reset_inflight(struct vhost_inflight *inflight); 357 void vhost_dev_free_inflight(struct vhost_inflight *inflight); 358 int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev); 359 int vhost_dev_set_inflight(struct vhost_dev *dev, 360 struct vhost_inflight *inflight); 361 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, 362 struct vhost_inflight *inflight); 363 bool vhost_dev_has_iommu(struct vhost_dev *dev); 364 365 #ifdef CONFIG_VHOST 366 int vhost_reset_device(struct vhost_dev *hdev); 367 #else 368 static inline int vhost_reset_device(struct vhost_dev *hdev) 369 { 370 return -ENOSYS; 371 } 372 #endif /* CONFIG_VHOST */ 373 374 /** 375 * vhost_supports_device_state(): Checks whether the back-end supports 376 * transferring internal device state for the purpose of migration. 377 * Support for this feature is required for vhost_set_device_state_fd() 378 * and vhost_check_device_state(). 379 * 380 * @dev: The vhost device 381 * 382 * Returns true if the device supports these commands, and false if it 383 * does not. 384 */ 385 #ifdef CONFIG_VHOST 386 bool vhost_supports_device_state(struct vhost_dev *dev); 387 #else 388 static inline bool vhost_supports_device_state(struct vhost_dev *dev) 389 { 390 return false; 391 } 392 #endif 393 394 /** 395 * vhost_set_device_state_fd(): Begin transfer of internal state from/to 396 * the back-end for the purpose of migration. Data is to be transferred 397 * over a pipe according to @direction and @phase. The sending end must 398 * only write to the pipe, and the receiving end must only read from it. 399 * Once the sending end is done, it closes its FD. The receiving end 400 * must take this as the end-of-transfer signal and close its FD, too. 401 * 402 * @fd is the back-end's end of the pipe: The write FD for SAVE, and the 403 * read FD for LOAD. This function transfers ownership of @fd to the 404 * back-end, i.e. closes it in the front-end. 405 * 406 * The back-end may optionally reply with an FD of its own, if this 407 * improves efficiency on its end. In this case, the returned FD is 408 * stored in *reply_fd. The back-end will discard the FD sent to it, 409 * and the front-end must use *reply_fd for transferring state to/from 410 * the back-end. 411 * 412 * @dev: The vhost device 413 * @direction: The direction in which the state is to be transferred. 414 * For outgoing migrations, this is SAVE, and data is read 415 * from the back-end and stored by the front-end in the 416 * migration stream. 417 * For incoming migrations, this is LOAD, and data is read 418 * by the front-end from the migration stream and sent to 419 * the back-end to restore the saved state. 420 * @phase: Which migration phase we are in. Currently, there is only 421 * STOPPED (device and all vrings are stopped), in the future, 422 * more phases such as PRE_COPY or POST_COPY may be added. 423 * @fd: Back-end's end of the pipe through which to transfer state; note 424 * that ownership is transferred to the back-end, so this function 425 * closes @fd in the front-end. 426 * @reply_fd: If the back-end wishes to use a different pipe for state 427 * transfer, this will contain an FD for the front-end to 428 * use. Otherwise, -1 is stored here. 429 * @errp: Potential error description 430 * 431 * Returns 0 on success, and -errno on failure. 432 */ 433 int vhost_set_device_state_fd(struct vhost_dev *dev, 434 VhostDeviceStateDirection direction, 435 VhostDeviceStatePhase phase, 436 int fd, 437 int *reply_fd, 438 Error **errp); 439 440 /** 441 * vhost_set_device_state_fd(): After transferring state from/to the 442 * back-end via vhost_set_device_state_fd(), i.e. once the sending end 443 * has closed the pipe, inquire the back-end to report any potential 444 * errors that have occurred on its side. This allows to sense errors 445 * like: 446 * - During outgoing migration, when the source side had already started 447 * to produce its state, something went wrong and it failed to finish 448 * - During incoming migration, when the received state is somehow 449 * invalid and cannot be processed by the back-end 450 * 451 * @dev: The vhost device 452 * @errp: Potential error description 453 * 454 * Returns 0 when the back-end reports successful state transfer and 455 * processing, and -errno when an error occurred somewhere. 456 */ 457 int vhost_check_device_state(struct vhost_dev *dev, Error **errp); 458 459 /** 460 * vhost_save_backend_state(): High-level function to receive a vhost 461 * back-end's state, and save it in @f. Uses 462 * `vhost_set_device_state_fd()` to get the data from the back-end, and 463 * stores it in consecutive chunks that are each prefixed by their 464 * respective length (be32). The end is marked by a 0-length chunk. 465 * 466 * Must only be called while the device and all its vrings are stopped 467 * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). 468 * 469 * @dev: The vhost device from which to save the state 470 * @f: Migration stream in which to save the state 471 * @errp: Potential error message 472 * 473 * Returns 0 on success, and -errno otherwise. 474 */ 475 #ifdef CONFIG_VHOST 476 int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); 477 #else 478 static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, 479 Error **errp) 480 { 481 return -ENOSYS; 482 } 483 #endif 484 485 /** 486 * vhost_load_backend_state(): High-level function to load a vhost 487 * back-end's state from @f, and send it over to the back-end. Reads 488 * the data from @f in the format used by `vhost_save_state()`, and uses 489 * `vhost_set_device_state_fd()` to transfer it to the back-end. 490 * 491 * Must only be called while the device and all its vrings are stopped 492 * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). 493 * 494 * @dev: The vhost device to which to send the state 495 * @f: Migration stream from which to load the state 496 * @errp: Potential error message 497 * 498 * Returns 0 on success, and -errno otherwise. 499 */ 500 #ifdef CONFIG_VHOST 501 int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); 502 #else 503 static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, 504 Error **errp) 505 { 506 return -ENOSYS; 507 } 508 #endif 509 510 #endif 511