1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/cryptodev.h" 26 #include "migration/migration.h" 27 #include "migration/postcopy-ram.h" 28 #include "trace.h" 29 #include "exec/ramblock.h" 30 31 #include <sys/ioctl.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 35 #include "standard-headers/linux/vhost_types.h" 36 37 #ifdef CONFIG_LINUX 38 #include <linux/userfaultfd.h> 39 #endif 40 41 #define VHOST_MEMORY_BASELINE_NREGIONS 8 42 #define VHOST_USER_F_PROTOCOL_FEATURES 30 43 #define VHOST_USER_BACKEND_MAX_FDS 8 44 45 /* 46 * Set maximum number of RAM slots supported to 47 * the maximum number supported by the target 48 * hardware plaform. 49 */ 50 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 51 defined(TARGET_ARM) || defined(TARGET_AARCH64) 52 #include "hw/acpi/acpi.h" 53 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 54 55 #elif defined(TARGET_PPC) || defined(TARGET_PPC64) 56 #include "hw/ppc/spapr.h" 57 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 58 59 #else 60 #define VHOST_USER_MAX_RAM_SLOTS 512 61 #endif 62 63 /* 64 * Maximum size of virtio device config space 65 */ 66 #define VHOST_USER_MAX_CONFIG_SIZE 256 67 68 enum VhostUserProtocolFeature { 69 VHOST_USER_PROTOCOL_F_MQ = 0, 70 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 71 VHOST_USER_PROTOCOL_F_RARP = 2, 72 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 73 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 74 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, 75 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 76 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 77 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 78 VHOST_USER_PROTOCOL_F_CONFIG = 9, 79 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, 80 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 81 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 82 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 83 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 84 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 85 VHOST_USER_PROTOCOL_F_STATUS = 16, 86 VHOST_USER_PROTOCOL_F_MAX 87 }; 88 89 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 90 91 typedef enum VhostUserRequest { 92 VHOST_USER_NONE = 0, 93 VHOST_USER_GET_FEATURES = 1, 94 VHOST_USER_SET_FEATURES = 2, 95 VHOST_USER_SET_OWNER = 3, 96 VHOST_USER_RESET_OWNER = 4, 97 VHOST_USER_SET_MEM_TABLE = 5, 98 VHOST_USER_SET_LOG_BASE = 6, 99 VHOST_USER_SET_LOG_FD = 7, 100 VHOST_USER_SET_VRING_NUM = 8, 101 VHOST_USER_SET_VRING_ADDR = 9, 102 VHOST_USER_SET_VRING_BASE = 10, 103 VHOST_USER_GET_VRING_BASE = 11, 104 VHOST_USER_SET_VRING_KICK = 12, 105 VHOST_USER_SET_VRING_CALL = 13, 106 VHOST_USER_SET_VRING_ERR = 14, 107 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 108 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 109 VHOST_USER_GET_QUEUE_NUM = 17, 110 VHOST_USER_SET_VRING_ENABLE = 18, 111 VHOST_USER_SEND_RARP = 19, 112 VHOST_USER_NET_SET_MTU = 20, 113 VHOST_USER_SET_BACKEND_REQ_FD = 21, 114 VHOST_USER_IOTLB_MSG = 22, 115 VHOST_USER_SET_VRING_ENDIAN = 23, 116 VHOST_USER_GET_CONFIG = 24, 117 VHOST_USER_SET_CONFIG = 25, 118 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 119 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 120 VHOST_USER_POSTCOPY_ADVISE = 28, 121 VHOST_USER_POSTCOPY_LISTEN = 29, 122 VHOST_USER_POSTCOPY_END = 30, 123 VHOST_USER_GET_INFLIGHT_FD = 31, 124 VHOST_USER_SET_INFLIGHT_FD = 32, 125 VHOST_USER_GPU_SET_SOCKET = 33, 126 VHOST_USER_RESET_DEVICE = 34, 127 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 128 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 129 VHOST_USER_ADD_MEM_REG = 37, 130 VHOST_USER_REM_MEM_REG = 38, 131 VHOST_USER_SET_STATUS = 39, 132 VHOST_USER_GET_STATUS = 40, 133 VHOST_USER_MAX 134 } VhostUserRequest; 135 136 typedef enum VhostUserSlaveRequest { 137 VHOST_USER_BACKEND_NONE = 0, 138 VHOST_USER_BACKEND_IOTLB_MSG = 1, 139 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 140 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 141 VHOST_USER_BACKEND_MAX 142 } VhostUserSlaveRequest; 143 144 typedef struct VhostUserMemoryRegion { 145 uint64_t guest_phys_addr; 146 uint64_t memory_size; 147 uint64_t userspace_addr; 148 uint64_t mmap_offset; 149 } VhostUserMemoryRegion; 150 151 typedef struct VhostUserMemory { 152 uint32_t nregions; 153 uint32_t padding; 154 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 155 } VhostUserMemory; 156 157 typedef struct VhostUserMemRegMsg { 158 uint64_t padding; 159 VhostUserMemoryRegion region; 160 } VhostUserMemRegMsg; 161 162 typedef struct VhostUserLog { 163 uint64_t mmap_size; 164 uint64_t mmap_offset; 165 } VhostUserLog; 166 167 typedef struct VhostUserConfig { 168 uint32_t offset; 169 uint32_t size; 170 uint32_t flags; 171 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 172 } VhostUserConfig; 173 174 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 175 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 176 177 typedef struct VhostUserCryptoSession { 178 /* session id for success, -1 on errors */ 179 int64_t session_id; 180 CryptoDevBackendSymSessionInfo session_setup_data; 181 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 182 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 183 } VhostUserCryptoSession; 184 185 static VhostUserConfig c __attribute__ ((unused)); 186 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 187 + sizeof(c.size) \ 188 + sizeof(c.flags)) 189 190 typedef struct VhostUserVringArea { 191 uint64_t u64; 192 uint64_t size; 193 uint64_t offset; 194 } VhostUserVringArea; 195 196 typedef struct VhostUserInflight { 197 uint64_t mmap_size; 198 uint64_t mmap_offset; 199 uint16_t num_queues; 200 uint16_t queue_size; 201 } VhostUserInflight; 202 203 typedef struct { 204 VhostUserRequest request; 205 206 #define VHOST_USER_VERSION_MASK (0x3) 207 #define VHOST_USER_REPLY_MASK (0x1 << 2) 208 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 209 uint32_t flags; 210 uint32_t size; /* the following payload size */ 211 } QEMU_PACKED VhostUserHeader; 212 213 typedef union { 214 #define VHOST_USER_VRING_IDX_MASK (0xff) 215 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 216 uint64_t u64; 217 struct vhost_vring_state state; 218 struct vhost_vring_addr addr; 219 VhostUserMemory memory; 220 VhostUserMemRegMsg mem_reg; 221 VhostUserLog log; 222 struct vhost_iotlb_msg iotlb; 223 VhostUserConfig config; 224 VhostUserCryptoSession session; 225 VhostUserVringArea area; 226 VhostUserInflight inflight; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *slave_ioc; 247 GSource *slave_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static bool ioeventfd_enabled(void) 276 { 277 return !kvm_enabled() || kvm_eventfds_enabled(); 278 } 279 280 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 281 { 282 struct vhost_user *u = dev->opaque; 283 CharBackend *chr = u->user->chr; 284 uint8_t *p = (uint8_t *) msg; 285 int r, size = VHOST_USER_HDR_SIZE; 286 287 r = qemu_chr_fe_read_all(chr, p, size); 288 if (r != size) { 289 int saved_errno = errno; 290 error_report("Failed to read msg header. Read %d instead of %d." 291 " Original request %d.", r, size, msg->hdr.request); 292 return r < 0 ? -saved_errno : -EIO; 293 } 294 295 /* validate received flags */ 296 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 297 error_report("Failed to read msg header." 298 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 299 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 300 return -EPROTO; 301 } 302 303 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 304 305 return 0; 306 } 307 308 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 309 { 310 struct vhost_user *u = dev->opaque; 311 CharBackend *chr = u->user->chr; 312 uint8_t *p = (uint8_t *) msg; 313 int r, size; 314 315 r = vhost_user_read_header(dev, msg); 316 if (r < 0) { 317 return r; 318 } 319 320 /* validate message size is sane */ 321 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 322 error_report("Failed to read msg header." 323 " Size %d exceeds the maximum %zu.", msg->hdr.size, 324 VHOST_USER_PAYLOAD_SIZE); 325 return -EPROTO; 326 } 327 328 if (msg->hdr.size) { 329 p += VHOST_USER_HDR_SIZE; 330 size = msg->hdr.size; 331 r = qemu_chr_fe_read_all(chr, p, size); 332 if (r != size) { 333 int saved_errno = errno; 334 error_report("Failed to read msg payload." 335 " Read %d instead of %d.", r, msg->hdr.size); 336 return r < 0 ? -saved_errno : -EIO; 337 } 338 } 339 340 return 0; 341 } 342 343 static int process_message_reply(struct vhost_dev *dev, 344 const VhostUserMsg *msg) 345 { 346 int ret; 347 VhostUserMsg msg_reply; 348 349 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 350 return 0; 351 } 352 353 ret = vhost_user_read(dev, &msg_reply); 354 if (ret < 0) { 355 return ret; 356 } 357 358 if (msg_reply.hdr.request != msg->hdr.request) { 359 error_report("Received unexpected msg type. " 360 "Expected %d received %d", 361 msg->hdr.request, msg_reply.hdr.request); 362 return -EPROTO; 363 } 364 365 return msg_reply.payload.u64 ? -EIO : 0; 366 } 367 368 static bool vhost_user_one_time_request(VhostUserRequest request) 369 { 370 switch (request) { 371 case VHOST_USER_SET_OWNER: 372 case VHOST_USER_RESET_OWNER: 373 case VHOST_USER_SET_MEM_TABLE: 374 case VHOST_USER_GET_QUEUE_NUM: 375 case VHOST_USER_NET_SET_MTU: 376 case VHOST_USER_ADD_MEM_REG: 377 case VHOST_USER_REM_MEM_REG: 378 return true; 379 default: 380 return false; 381 } 382 } 383 384 /* most non-init callers ignore the error */ 385 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 386 int *fds, int fd_num) 387 { 388 struct vhost_user *u = dev->opaque; 389 CharBackend *chr = u->user->chr; 390 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 391 392 /* 393 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 394 * we just need send it once in the first time. For later such 395 * request, we just ignore it. 396 */ 397 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 398 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 399 return 0; 400 } 401 402 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 403 error_report("Failed to set msg fds."); 404 return -EINVAL; 405 } 406 407 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 408 if (ret != size) { 409 int saved_errno = errno; 410 error_report("Failed to write msg." 411 " Wrote %d instead of %d.", ret, size); 412 return ret < 0 ? -saved_errno : -EIO; 413 } 414 415 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 416 417 return 0; 418 } 419 420 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 421 { 422 VhostUserMsg msg = { 423 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 424 .hdr.flags = VHOST_USER_VERSION, 425 }; 426 427 return vhost_user_write(dev, &msg, &fd, 1); 428 } 429 430 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 431 struct vhost_log *log) 432 { 433 int fds[VHOST_USER_MAX_RAM_SLOTS]; 434 size_t fd_num = 0; 435 bool shmfd = virtio_has_feature(dev->protocol_features, 436 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 437 int ret; 438 VhostUserMsg msg = { 439 .hdr.request = VHOST_USER_SET_LOG_BASE, 440 .hdr.flags = VHOST_USER_VERSION, 441 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 442 .payload.log.mmap_offset = 0, 443 .hdr.size = sizeof(msg.payload.log), 444 }; 445 446 /* Send only once with first queue pair */ 447 if (dev->vq_index != 0) { 448 return 0; 449 } 450 451 if (shmfd && log->fd != -1) { 452 fds[fd_num++] = log->fd; 453 } 454 455 ret = vhost_user_write(dev, &msg, fds, fd_num); 456 if (ret < 0) { 457 return ret; 458 } 459 460 if (shmfd) { 461 msg.hdr.size = 0; 462 ret = vhost_user_read(dev, &msg); 463 if (ret < 0) { 464 return ret; 465 } 466 467 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 468 error_report("Received unexpected msg type. " 469 "Expected %d received %d", 470 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 471 return -EPROTO; 472 } 473 } 474 475 return 0; 476 } 477 478 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 479 int *fd) 480 { 481 MemoryRegion *mr; 482 483 assert((uintptr_t)addr == addr); 484 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 485 *fd = memory_region_get_fd(mr); 486 487 return mr; 488 } 489 490 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 491 struct vhost_memory_region *src, 492 uint64_t mmap_offset) 493 { 494 assert(src != NULL && dst != NULL); 495 dst->userspace_addr = src->userspace_addr; 496 dst->memory_size = src->memory_size; 497 dst->guest_phys_addr = src->guest_phys_addr; 498 dst->mmap_offset = mmap_offset; 499 } 500 501 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 502 struct vhost_dev *dev, 503 VhostUserMsg *msg, 504 int *fds, size_t *fd_num, 505 bool track_ramblocks) 506 { 507 int i, fd; 508 ram_addr_t offset; 509 MemoryRegion *mr; 510 struct vhost_memory_region *reg; 511 VhostUserMemoryRegion region_buffer; 512 513 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 514 515 for (i = 0; i < dev->mem->nregions; ++i) { 516 reg = dev->mem->regions + i; 517 518 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 519 if (fd > 0) { 520 if (track_ramblocks) { 521 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 522 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 523 reg->memory_size, 524 reg->guest_phys_addr, 525 reg->userspace_addr, 526 offset); 527 u->region_rb_offset[i] = offset; 528 u->region_rb[i] = mr->ram_block; 529 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 530 error_report("Failed preparing vhost-user memory table msg"); 531 return -ENOBUFS; 532 } 533 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 534 msg->payload.memory.regions[*fd_num] = region_buffer; 535 fds[(*fd_num)++] = fd; 536 } else if (track_ramblocks) { 537 u->region_rb_offset[i] = 0; 538 u->region_rb[i] = NULL; 539 } 540 } 541 542 msg->payload.memory.nregions = *fd_num; 543 544 if (!*fd_num) { 545 error_report("Failed initializing vhost-user memory map, " 546 "consider using -object memory-backend-file share=on"); 547 return -EINVAL; 548 } 549 550 msg->hdr.size = sizeof(msg->payload.memory.nregions); 551 msg->hdr.size += sizeof(msg->payload.memory.padding); 552 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 553 554 return 0; 555 } 556 557 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 558 struct vhost_memory_region *vdev_reg) 559 { 560 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 561 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 562 shadow_reg->memory_size == vdev_reg->memory_size; 563 } 564 565 static void scrub_shadow_regions(struct vhost_dev *dev, 566 struct scrub_regions *add_reg, 567 int *nr_add_reg, 568 struct scrub_regions *rem_reg, 569 int *nr_rem_reg, uint64_t *shadow_pcb, 570 bool track_ramblocks) 571 { 572 struct vhost_user *u = dev->opaque; 573 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 574 struct vhost_memory_region *reg, *shadow_reg; 575 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 576 ram_addr_t offset; 577 MemoryRegion *mr; 578 bool matching; 579 580 /* 581 * Find memory regions present in our shadow state which are not in 582 * the device's current memory state. 583 * 584 * Mark regions in both the shadow and device state as "found". 585 */ 586 for (i = 0; i < u->num_shadow_regions; i++) { 587 shadow_reg = &u->shadow_regions[i]; 588 matching = false; 589 590 for (j = 0; j < dev->mem->nregions; j++) { 591 reg = &dev->mem->regions[j]; 592 593 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 594 595 if (reg_equal(shadow_reg, reg)) { 596 matching = true; 597 found[j] = true; 598 if (track_ramblocks) { 599 /* 600 * Reset postcopy client bases, region_rb, and 601 * region_rb_offset in case regions are removed. 602 */ 603 if (fd > 0) { 604 u->region_rb_offset[j] = offset; 605 u->region_rb[j] = mr->ram_block; 606 shadow_pcb[j] = u->postcopy_client_bases[i]; 607 } else { 608 u->region_rb_offset[j] = 0; 609 u->region_rb[j] = NULL; 610 } 611 } 612 break; 613 } 614 } 615 616 /* 617 * If the region was not found in the current device memory state 618 * create an entry for it in the removed list. 619 */ 620 if (!matching) { 621 rem_reg[rm_idx].region = shadow_reg; 622 rem_reg[rm_idx++].reg_idx = i; 623 } 624 } 625 626 /* 627 * For regions not marked "found", create entries in the added list. 628 * 629 * Note their indexes in the device memory state and the indexes of their 630 * file descriptors. 631 */ 632 for (i = 0; i < dev->mem->nregions; i++) { 633 reg = &dev->mem->regions[i]; 634 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 635 if (fd > 0) { 636 ++fd_num; 637 } 638 639 /* 640 * If the region was in both the shadow and device state we don't 641 * need to send a VHOST_USER_ADD_MEM_REG message for it. 642 */ 643 if (found[i]) { 644 continue; 645 } 646 647 add_reg[add_idx].region = reg; 648 add_reg[add_idx].reg_idx = i; 649 add_reg[add_idx++].fd_idx = fd_num; 650 } 651 *nr_rem_reg = rm_idx; 652 *nr_add_reg = add_idx; 653 654 return; 655 } 656 657 static int send_remove_regions(struct vhost_dev *dev, 658 struct scrub_regions *remove_reg, 659 int nr_rem_reg, VhostUserMsg *msg, 660 bool reply_supported) 661 { 662 struct vhost_user *u = dev->opaque; 663 struct vhost_memory_region *shadow_reg; 664 int i, fd, shadow_reg_idx, ret; 665 ram_addr_t offset; 666 VhostUserMemoryRegion region_buffer; 667 668 /* 669 * The regions in remove_reg appear in the same order they do in the 670 * shadow table. Therefore we can minimize memory copies by iterating 671 * through remove_reg backwards. 672 */ 673 for (i = nr_rem_reg - 1; i >= 0; i--) { 674 shadow_reg = remove_reg[i].region; 675 shadow_reg_idx = remove_reg[i].reg_idx; 676 677 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 678 679 if (fd > 0) { 680 msg->hdr.request = VHOST_USER_REM_MEM_REG; 681 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 682 msg->payload.mem_reg.region = region_buffer; 683 684 ret = vhost_user_write(dev, msg, NULL, 0); 685 if (ret < 0) { 686 return ret; 687 } 688 689 if (reply_supported) { 690 ret = process_message_reply(dev, msg); 691 if (ret) { 692 return ret; 693 } 694 } 695 } 696 697 /* 698 * At this point we know the backend has unmapped the region. It is now 699 * safe to remove it from the shadow table. 700 */ 701 memmove(&u->shadow_regions[shadow_reg_idx], 702 &u->shadow_regions[shadow_reg_idx + 1], 703 sizeof(struct vhost_memory_region) * 704 (u->num_shadow_regions - shadow_reg_idx - 1)); 705 u->num_shadow_regions--; 706 } 707 708 return 0; 709 } 710 711 static int send_add_regions(struct vhost_dev *dev, 712 struct scrub_regions *add_reg, int nr_add_reg, 713 VhostUserMsg *msg, uint64_t *shadow_pcb, 714 bool reply_supported, bool track_ramblocks) 715 { 716 struct vhost_user *u = dev->opaque; 717 int i, fd, ret, reg_idx, reg_fd_idx; 718 struct vhost_memory_region *reg; 719 MemoryRegion *mr; 720 ram_addr_t offset; 721 VhostUserMsg msg_reply; 722 VhostUserMemoryRegion region_buffer; 723 724 for (i = 0; i < nr_add_reg; i++) { 725 reg = add_reg[i].region; 726 reg_idx = add_reg[i].reg_idx; 727 reg_fd_idx = add_reg[i].fd_idx; 728 729 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 730 731 if (fd > 0) { 732 if (track_ramblocks) { 733 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 734 reg->memory_size, 735 reg->guest_phys_addr, 736 reg->userspace_addr, 737 offset); 738 u->region_rb_offset[reg_idx] = offset; 739 u->region_rb[reg_idx] = mr->ram_block; 740 } 741 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 742 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 743 msg->payload.mem_reg.region = region_buffer; 744 745 ret = vhost_user_write(dev, msg, &fd, 1); 746 if (ret < 0) { 747 return ret; 748 } 749 750 if (track_ramblocks) { 751 uint64_t reply_gpa; 752 753 ret = vhost_user_read(dev, &msg_reply); 754 if (ret < 0) { 755 return ret; 756 } 757 758 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 759 760 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 761 error_report("%s: Received unexpected msg type." 762 "Expected %d received %d", __func__, 763 VHOST_USER_ADD_MEM_REG, 764 msg_reply.hdr.request); 765 return -EPROTO; 766 } 767 768 /* 769 * We're using the same structure, just reusing one of the 770 * fields, so it should be the same size. 771 */ 772 if (msg_reply.hdr.size != msg->hdr.size) { 773 error_report("%s: Unexpected size for postcopy reply " 774 "%d vs %d", __func__, msg_reply.hdr.size, 775 msg->hdr.size); 776 return -EPROTO; 777 } 778 779 /* Get the postcopy client base from the backend's reply. */ 780 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 781 shadow_pcb[reg_idx] = 782 msg_reply.payload.mem_reg.region.userspace_addr; 783 trace_vhost_user_set_mem_table_postcopy( 784 msg_reply.payload.mem_reg.region.userspace_addr, 785 msg->payload.mem_reg.region.userspace_addr, 786 reg_fd_idx, reg_idx); 787 } else { 788 error_report("%s: invalid postcopy reply for region. " 789 "Got guest physical address %" PRIX64 ", expected " 790 "%" PRIX64, __func__, reply_gpa, 791 dev->mem->regions[reg_idx].guest_phys_addr); 792 return -EPROTO; 793 } 794 } else if (reply_supported) { 795 ret = process_message_reply(dev, msg); 796 if (ret) { 797 return ret; 798 } 799 } 800 } else if (track_ramblocks) { 801 u->region_rb_offset[reg_idx] = 0; 802 u->region_rb[reg_idx] = NULL; 803 } 804 805 /* 806 * At this point, we know the backend has mapped in the new 807 * region, if the region has a valid file descriptor. 808 * 809 * The region should now be added to the shadow table. 810 */ 811 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 812 reg->guest_phys_addr; 813 u->shadow_regions[u->num_shadow_regions].userspace_addr = 814 reg->userspace_addr; 815 u->shadow_regions[u->num_shadow_regions].memory_size = 816 reg->memory_size; 817 u->num_shadow_regions++; 818 } 819 820 return 0; 821 } 822 823 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 824 VhostUserMsg *msg, 825 bool reply_supported, 826 bool track_ramblocks) 827 { 828 struct vhost_user *u = dev->opaque; 829 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 830 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 831 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 832 int nr_add_reg, nr_rem_reg; 833 int ret; 834 835 msg->hdr.size = sizeof(msg->payload.mem_reg); 836 837 /* Find the regions which need to be removed or added. */ 838 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 839 shadow_pcb, track_ramblocks); 840 841 if (nr_rem_reg) { 842 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 843 reply_supported); 844 if (ret < 0) { 845 goto err; 846 } 847 } 848 849 if (nr_add_reg) { 850 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 851 reply_supported, track_ramblocks); 852 if (ret < 0) { 853 goto err; 854 } 855 } 856 857 if (track_ramblocks) { 858 memcpy(u->postcopy_client_bases, shadow_pcb, 859 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 860 /* 861 * Now we've registered this with the postcopy code, we ack to the 862 * client, because now we're in the position to be able to deal with 863 * any faults it generates. 864 */ 865 /* TODO: Use this for failure cases as well with a bad value. */ 866 msg->hdr.size = sizeof(msg->payload.u64); 867 msg->payload.u64 = 0; /* OK */ 868 869 ret = vhost_user_write(dev, msg, NULL, 0); 870 if (ret < 0) { 871 return ret; 872 } 873 } 874 875 return 0; 876 877 err: 878 if (track_ramblocks) { 879 memcpy(u->postcopy_client_bases, shadow_pcb, 880 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 881 } 882 883 return ret; 884 } 885 886 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 887 struct vhost_memory *mem, 888 bool reply_supported, 889 bool config_mem_slots) 890 { 891 struct vhost_user *u = dev->opaque; 892 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 893 size_t fd_num = 0; 894 VhostUserMsg msg_reply; 895 int region_i, msg_i; 896 int ret; 897 898 VhostUserMsg msg = { 899 .hdr.flags = VHOST_USER_VERSION, 900 }; 901 902 if (u->region_rb_len < dev->mem->nregions) { 903 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 904 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 905 dev->mem->nregions); 906 memset(&(u->region_rb[u->region_rb_len]), '\0', 907 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 908 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 909 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 910 u->region_rb_len = dev->mem->nregions; 911 } 912 913 if (config_mem_slots) { 914 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 915 if (ret < 0) { 916 return ret; 917 } 918 } else { 919 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 920 true); 921 if (ret < 0) { 922 return ret; 923 } 924 925 ret = vhost_user_write(dev, &msg, fds, fd_num); 926 if (ret < 0) { 927 return ret; 928 } 929 930 ret = vhost_user_read(dev, &msg_reply); 931 if (ret < 0) { 932 return ret; 933 } 934 935 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 936 error_report("%s: Received unexpected msg type." 937 "Expected %d received %d", __func__, 938 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 939 return -EPROTO; 940 } 941 942 /* 943 * We're using the same structure, just reusing one of the 944 * fields, so it should be the same size. 945 */ 946 if (msg_reply.hdr.size != msg.hdr.size) { 947 error_report("%s: Unexpected size for postcopy reply " 948 "%d vs %d", __func__, msg_reply.hdr.size, 949 msg.hdr.size); 950 return -EPROTO; 951 } 952 953 memset(u->postcopy_client_bases, 0, 954 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 955 956 /* 957 * They're in the same order as the regions that were sent 958 * but some of the regions were skipped (above) if they 959 * didn't have fd's 960 */ 961 for (msg_i = 0, region_i = 0; 962 region_i < dev->mem->nregions; 963 region_i++) { 964 if (msg_i < fd_num && 965 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 966 dev->mem->regions[region_i].guest_phys_addr) { 967 u->postcopy_client_bases[region_i] = 968 msg_reply.payload.memory.regions[msg_i].userspace_addr; 969 trace_vhost_user_set_mem_table_postcopy( 970 msg_reply.payload.memory.regions[msg_i].userspace_addr, 971 msg.payload.memory.regions[msg_i].userspace_addr, 972 msg_i, region_i); 973 msg_i++; 974 } 975 } 976 if (msg_i != fd_num) { 977 error_report("%s: postcopy reply not fully consumed " 978 "%d vs %zd", 979 __func__, msg_i, fd_num); 980 return -EIO; 981 } 982 983 /* 984 * Now we've registered this with the postcopy code, we ack to the 985 * client, because now we're in the position to be able to deal 986 * with any faults it generates. 987 */ 988 /* TODO: Use this for failure cases as well with a bad value. */ 989 msg.hdr.size = sizeof(msg.payload.u64); 990 msg.payload.u64 = 0; /* OK */ 991 ret = vhost_user_write(dev, &msg, NULL, 0); 992 if (ret < 0) { 993 return ret; 994 } 995 } 996 997 return 0; 998 } 999 1000 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1001 struct vhost_memory *mem) 1002 { 1003 struct vhost_user *u = dev->opaque; 1004 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1005 size_t fd_num = 0; 1006 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1007 bool reply_supported = virtio_has_feature(dev->protocol_features, 1008 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1009 bool config_mem_slots = 1010 virtio_has_feature(dev->protocol_features, 1011 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1012 int ret; 1013 1014 if (do_postcopy) { 1015 /* 1016 * Postcopy has enough differences that it's best done in it's own 1017 * version 1018 */ 1019 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1020 config_mem_slots); 1021 } 1022 1023 VhostUserMsg msg = { 1024 .hdr.flags = VHOST_USER_VERSION, 1025 }; 1026 1027 if (reply_supported) { 1028 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1029 } 1030 1031 if (config_mem_slots) { 1032 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1033 if (ret < 0) { 1034 return ret; 1035 } 1036 } else { 1037 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1038 false); 1039 if (ret < 0) { 1040 return ret; 1041 } 1042 1043 ret = vhost_user_write(dev, &msg, fds, fd_num); 1044 if (ret < 0) { 1045 return ret; 1046 } 1047 1048 if (reply_supported) { 1049 return process_message_reply(dev, &msg); 1050 } 1051 } 1052 1053 return 0; 1054 } 1055 1056 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1057 struct vhost_vring_state *ring) 1058 { 1059 bool cross_endian = virtio_has_feature(dev->protocol_features, 1060 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1061 VhostUserMsg msg = { 1062 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1063 .hdr.flags = VHOST_USER_VERSION, 1064 .payload.state = *ring, 1065 .hdr.size = sizeof(msg.payload.state), 1066 }; 1067 1068 if (!cross_endian) { 1069 error_report("vhost-user trying to send unhandled ioctl"); 1070 return -ENOTSUP; 1071 } 1072 1073 return vhost_user_write(dev, &msg, NULL, 0); 1074 } 1075 1076 static int vhost_set_vring(struct vhost_dev *dev, 1077 unsigned long int request, 1078 struct vhost_vring_state *ring) 1079 { 1080 VhostUserMsg msg = { 1081 .hdr.request = request, 1082 .hdr.flags = VHOST_USER_VERSION, 1083 .payload.state = *ring, 1084 .hdr.size = sizeof(msg.payload.state), 1085 }; 1086 1087 return vhost_user_write(dev, &msg, NULL, 0); 1088 } 1089 1090 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1091 struct vhost_vring_state *ring) 1092 { 1093 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1094 } 1095 1096 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1097 { 1098 assert(n && n->unmap_addr); 1099 munmap(n->unmap_addr, qemu_real_host_page_size()); 1100 n->unmap_addr = NULL; 1101 } 1102 1103 /* 1104 * clean-up function for notifier, will finally free the structure 1105 * under rcu. 1106 */ 1107 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1108 VirtIODevice *vdev) 1109 { 1110 if (n->addr) { 1111 if (vdev) { 1112 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1113 } 1114 assert(!n->unmap_addr); 1115 n->unmap_addr = n->addr; 1116 n->addr = NULL; 1117 call_rcu(n, vhost_user_host_notifier_free, rcu); 1118 } 1119 } 1120 1121 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1122 struct vhost_vring_state *ring) 1123 { 1124 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1125 } 1126 1127 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1128 { 1129 int i; 1130 1131 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1132 return -EINVAL; 1133 } 1134 1135 for (i = 0; i < dev->nvqs; ++i) { 1136 int ret; 1137 struct vhost_vring_state state = { 1138 .index = dev->vq_index + i, 1139 .num = enable, 1140 }; 1141 1142 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1143 if (ret < 0) { 1144 /* 1145 * Restoring the previous state is likely infeasible, as well as 1146 * proceeding regardless the error, so just bail out and hope for 1147 * the device-level recovery. 1148 */ 1149 return ret; 1150 } 1151 } 1152 1153 return 0; 1154 } 1155 1156 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1157 int idx) 1158 { 1159 if (idx >= u->notifiers->len) { 1160 return NULL; 1161 } 1162 return g_ptr_array_index(u->notifiers, idx); 1163 } 1164 1165 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1166 struct vhost_vring_state *ring) 1167 { 1168 int ret; 1169 VhostUserMsg msg = { 1170 .hdr.request = VHOST_USER_GET_VRING_BASE, 1171 .hdr.flags = VHOST_USER_VERSION, 1172 .payload.state = *ring, 1173 .hdr.size = sizeof(msg.payload.state), 1174 }; 1175 struct vhost_user *u = dev->opaque; 1176 1177 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1178 if (n) { 1179 vhost_user_host_notifier_remove(n, dev->vdev); 1180 } 1181 1182 ret = vhost_user_write(dev, &msg, NULL, 0); 1183 if (ret < 0) { 1184 return ret; 1185 } 1186 1187 ret = vhost_user_read(dev, &msg); 1188 if (ret < 0) { 1189 return ret; 1190 } 1191 1192 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1193 error_report("Received unexpected msg type. Expected %d received %d", 1194 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1195 return -EPROTO; 1196 } 1197 1198 if (msg.hdr.size != sizeof(msg.payload.state)) { 1199 error_report("Received bad msg size."); 1200 return -EPROTO; 1201 } 1202 1203 *ring = msg.payload.state; 1204 1205 return 0; 1206 } 1207 1208 static int vhost_set_vring_file(struct vhost_dev *dev, 1209 VhostUserRequest request, 1210 struct vhost_vring_file *file) 1211 { 1212 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1213 size_t fd_num = 0; 1214 VhostUserMsg msg = { 1215 .hdr.request = request, 1216 .hdr.flags = VHOST_USER_VERSION, 1217 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1218 .hdr.size = sizeof(msg.payload.u64), 1219 }; 1220 1221 if (ioeventfd_enabled() && file->fd > 0) { 1222 fds[fd_num++] = file->fd; 1223 } else { 1224 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1225 } 1226 1227 return vhost_user_write(dev, &msg, fds, fd_num); 1228 } 1229 1230 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1231 struct vhost_vring_file *file) 1232 { 1233 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1234 } 1235 1236 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1237 struct vhost_vring_file *file) 1238 { 1239 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1240 } 1241 1242 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1243 struct vhost_vring_file *file) 1244 { 1245 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1246 } 1247 1248 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1249 { 1250 int ret; 1251 VhostUserMsg msg = { 1252 .hdr.request = request, 1253 .hdr.flags = VHOST_USER_VERSION, 1254 }; 1255 1256 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1257 return 0; 1258 } 1259 1260 ret = vhost_user_write(dev, &msg, NULL, 0); 1261 if (ret < 0) { 1262 return ret; 1263 } 1264 1265 ret = vhost_user_read(dev, &msg); 1266 if (ret < 0) { 1267 return ret; 1268 } 1269 1270 if (msg.hdr.request != request) { 1271 error_report("Received unexpected msg type. Expected %d received %d", 1272 request, msg.hdr.request); 1273 return -EPROTO; 1274 } 1275 1276 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1277 error_report("Received bad msg size."); 1278 return -EPROTO; 1279 } 1280 1281 *u64 = msg.payload.u64; 1282 1283 return 0; 1284 } 1285 1286 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1287 { 1288 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1289 return -EPROTO; 1290 } 1291 1292 return 0; 1293 } 1294 1295 static int enforce_reply(struct vhost_dev *dev, 1296 const VhostUserMsg *msg) 1297 { 1298 uint64_t dummy; 1299 1300 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1301 return process_message_reply(dev, msg); 1302 } 1303 1304 /* 1305 * We need to wait for a reply but the backend does not 1306 * support replies for the command we just sent. 1307 * Send VHOST_USER_GET_FEATURES which makes all backends 1308 * send a reply. 1309 */ 1310 return vhost_user_get_features(dev, &dummy); 1311 } 1312 1313 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1314 struct vhost_vring_addr *addr) 1315 { 1316 int ret; 1317 VhostUserMsg msg = { 1318 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1319 .hdr.flags = VHOST_USER_VERSION, 1320 .payload.addr = *addr, 1321 .hdr.size = sizeof(msg.payload.addr), 1322 }; 1323 1324 bool reply_supported = virtio_has_feature(dev->protocol_features, 1325 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1326 1327 /* 1328 * wait for a reply if logging is enabled to make sure 1329 * backend is actually logging changes 1330 */ 1331 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1332 1333 if (reply_supported && wait_for_reply) { 1334 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1335 } 1336 1337 ret = vhost_user_write(dev, &msg, NULL, 0); 1338 if (ret < 0) { 1339 return ret; 1340 } 1341 1342 if (wait_for_reply) { 1343 return enforce_reply(dev, &msg); 1344 } 1345 1346 return 0; 1347 } 1348 1349 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1350 bool wait_for_reply) 1351 { 1352 VhostUserMsg msg = { 1353 .hdr.request = request, 1354 .hdr.flags = VHOST_USER_VERSION, 1355 .payload.u64 = u64, 1356 .hdr.size = sizeof(msg.payload.u64), 1357 }; 1358 int ret; 1359 1360 if (wait_for_reply) { 1361 bool reply_supported = virtio_has_feature(dev->protocol_features, 1362 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1363 if (reply_supported) { 1364 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1365 } 1366 } 1367 1368 ret = vhost_user_write(dev, &msg, NULL, 0); 1369 if (ret < 0) { 1370 return ret; 1371 } 1372 1373 if (wait_for_reply) { 1374 return enforce_reply(dev, &msg); 1375 } 1376 1377 return 0; 1378 } 1379 1380 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1381 { 1382 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1383 } 1384 1385 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1386 { 1387 uint64_t value; 1388 int ret; 1389 1390 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1391 if (ret < 0) { 1392 return ret; 1393 } 1394 *status = value; 1395 1396 return 0; 1397 } 1398 1399 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1400 { 1401 uint8_t s; 1402 int ret; 1403 1404 ret = vhost_user_get_status(dev, &s); 1405 if (ret < 0) { 1406 return ret; 1407 } 1408 1409 if ((s & status) == status) { 1410 return 0; 1411 } 1412 s |= status; 1413 1414 return vhost_user_set_status(dev, s); 1415 } 1416 1417 static int vhost_user_set_features(struct vhost_dev *dev, 1418 uint64_t features) 1419 { 1420 /* 1421 * wait for a reply if logging is enabled to make sure 1422 * backend is actually logging changes 1423 */ 1424 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1425 int ret; 1426 1427 /* 1428 * We need to include any extra backend only feature bits that 1429 * might be needed by our device. Currently this includes the 1430 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1431 * features. 1432 */ 1433 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1434 features | dev->backend_features, 1435 log_enabled); 1436 1437 if (virtio_has_feature(dev->protocol_features, 1438 VHOST_USER_PROTOCOL_F_STATUS)) { 1439 if (!ret) { 1440 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1441 } 1442 } 1443 1444 return ret; 1445 } 1446 1447 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1448 uint64_t features) 1449 { 1450 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1451 false); 1452 } 1453 1454 static int vhost_user_set_owner(struct vhost_dev *dev) 1455 { 1456 VhostUserMsg msg = { 1457 .hdr.request = VHOST_USER_SET_OWNER, 1458 .hdr.flags = VHOST_USER_VERSION, 1459 }; 1460 1461 return vhost_user_write(dev, &msg, NULL, 0); 1462 } 1463 1464 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1465 uint64_t *max_memslots) 1466 { 1467 uint64_t backend_max_memslots; 1468 int err; 1469 1470 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1471 &backend_max_memslots); 1472 if (err < 0) { 1473 return err; 1474 } 1475 1476 *max_memslots = backend_max_memslots; 1477 1478 return 0; 1479 } 1480 1481 static int vhost_user_reset_device(struct vhost_dev *dev) 1482 { 1483 VhostUserMsg msg = { 1484 .hdr.flags = VHOST_USER_VERSION, 1485 }; 1486 1487 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1488 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1489 ? VHOST_USER_RESET_DEVICE 1490 : VHOST_USER_RESET_OWNER; 1491 1492 return vhost_user_write(dev, &msg, NULL, 0); 1493 } 1494 1495 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1496 { 1497 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1498 return -ENOSYS; 1499 } 1500 1501 return dev->config_ops->vhost_dev_config_notifier(dev); 1502 } 1503 1504 /* 1505 * Fetch or create the notifier for a given idx. Newly created 1506 * notifiers are added to the pointer array that tracks them. 1507 */ 1508 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1509 int idx) 1510 { 1511 VhostUserHostNotifier *n = NULL; 1512 if (idx >= u->notifiers->len) { 1513 g_ptr_array_set_size(u->notifiers, idx + 1); 1514 } 1515 1516 n = g_ptr_array_index(u->notifiers, idx); 1517 if (!n) { 1518 /* 1519 * In case notification arrive out-of-order, 1520 * make room for current index. 1521 */ 1522 g_ptr_array_remove_index(u->notifiers, idx); 1523 n = g_new0(VhostUserHostNotifier, 1); 1524 n->idx = idx; 1525 g_ptr_array_insert(u->notifiers, idx, n); 1526 trace_vhost_user_create_notifier(idx, n); 1527 } 1528 1529 return n; 1530 } 1531 1532 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1533 VhostUserVringArea *area, 1534 int fd) 1535 { 1536 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1537 size_t page_size = qemu_real_host_page_size(); 1538 struct vhost_user *u = dev->opaque; 1539 VhostUserState *user = u->user; 1540 VirtIODevice *vdev = dev->vdev; 1541 VhostUserHostNotifier *n; 1542 void *addr; 1543 char *name; 1544 1545 if (!virtio_has_feature(dev->protocol_features, 1546 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1547 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1548 return -EINVAL; 1549 } 1550 1551 /* 1552 * Fetch notifier and invalidate any old data before setting up 1553 * new mapped address. 1554 */ 1555 n = fetch_or_create_notifier(user, queue_idx); 1556 vhost_user_host_notifier_remove(n, vdev); 1557 1558 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1559 return 0; 1560 } 1561 1562 /* Sanity check. */ 1563 if (area->size != page_size) { 1564 return -EINVAL; 1565 } 1566 1567 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1568 fd, area->offset); 1569 if (addr == MAP_FAILED) { 1570 return -EFAULT; 1571 } 1572 1573 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1574 user, queue_idx); 1575 if (!n->mr.ram) { /* Don't init again after suspend. */ 1576 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1577 page_size, addr); 1578 } else { 1579 n->mr.ram_block->host = addr; 1580 } 1581 g_free(name); 1582 1583 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1584 object_unparent(OBJECT(&n->mr)); 1585 munmap(addr, page_size); 1586 return -ENXIO; 1587 } 1588 1589 n->addr = addr; 1590 1591 return 0; 1592 } 1593 1594 static void close_slave_channel(struct vhost_user *u) 1595 { 1596 g_source_destroy(u->slave_src); 1597 g_source_unref(u->slave_src); 1598 u->slave_src = NULL; 1599 object_unref(OBJECT(u->slave_ioc)); 1600 u->slave_ioc = NULL; 1601 } 1602 1603 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1604 gpointer opaque) 1605 { 1606 struct vhost_dev *dev = opaque; 1607 struct vhost_user *u = dev->opaque; 1608 VhostUserHeader hdr = { 0, }; 1609 VhostUserPayload payload = { 0, }; 1610 Error *local_err = NULL; 1611 gboolean rc = G_SOURCE_CONTINUE; 1612 int ret = 0; 1613 struct iovec iov; 1614 g_autofree int *fd = NULL; 1615 size_t fdsize = 0; 1616 int i; 1617 1618 /* Read header */ 1619 iov.iov_base = &hdr; 1620 iov.iov_len = VHOST_USER_HDR_SIZE; 1621 1622 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1623 error_report_err(local_err); 1624 goto err; 1625 } 1626 1627 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1628 error_report("Failed to read msg header." 1629 " Size %d exceeds the maximum %zu.", hdr.size, 1630 VHOST_USER_PAYLOAD_SIZE); 1631 goto err; 1632 } 1633 1634 /* Read payload */ 1635 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1636 error_report_err(local_err); 1637 goto err; 1638 } 1639 1640 switch (hdr.request) { 1641 case VHOST_USER_BACKEND_IOTLB_MSG: 1642 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1643 break; 1644 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1645 ret = vhost_user_slave_handle_config_change(dev); 1646 break; 1647 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1648 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1649 fd ? fd[0] : -1); 1650 break; 1651 default: 1652 error_report("Received unexpected msg type: %d.", hdr.request); 1653 ret = -EINVAL; 1654 } 1655 1656 /* 1657 * REPLY_ACK feature handling. Other reply types has to be managed 1658 * directly in their request handlers. 1659 */ 1660 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1661 struct iovec iovec[2]; 1662 1663 1664 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1665 hdr.flags |= VHOST_USER_REPLY_MASK; 1666 1667 payload.u64 = !!ret; 1668 hdr.size = sizeof(payload.u64); 1669 1670 iovec[0].iov_base = &hdr; 1671 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1672 iovec[1].iov_base = &payload; 1673 iovec[1].iov_len = hdr.size; 1674 1675 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1676 error_report_err(local_err); 1677 goto err; 1678 } 1679 } 1680 1681 goto fdcleanup; 1682 1683 err: 1684 close_slave_channel(u); 1685 rc = G_SOURCE_REMOVE; 1686 1687 fdcleanup: 1688 if (fd) { 1689 for (i = 0; i < fdsize; i++) { 1690 close(fd[i]); 1691 } 1692 } 1693 return rc; 1694 } 1695 1696 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1697 { 1698 VhostUserMsg msg = { 1699 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1700 .hdr.flags = VHOST_USER_VERSION, 1701 }; 1702 struct vhost_user *u = dev->opaque; 1703 int sv[2], ret = 0; 1704 bool reply_supported = virtio_has_feature(dev->protocol_features, 1705 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1706 Error *local_err = NULL; 1707 QIOChannel *ioc; 1708 1709 if (!virtio_has_feature(dev->protocol_features, 1710 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1711 return 0; 1712 } 1713 1714 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1715 int saved_errno = errno; 1716 error_report("socketpair() failed"); 1717 return -saved_errno; 1718 } 1719 1720 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1721 if (!ioc) { 1722 error_report_err(local_err); 1723 return -ECONNREFUSED; 1724 } 1725 u->slave_ioc = ioc; 1726 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 1727 G_IO_IN | G_IO_HUP, 1728 slave_read, dev, NULL, NULL); 1729 1730 if (reply_supported) { 1731 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1732 } 1733 1734 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1735 if (ret) { 1736 goto out; 1737 } 1738 1739 if (reply_supported) { 1740 ret = process_message_reply(dev, &msg); 1741 } 1742 1743 out: 1744 close(sv[1]); 1745 if (ret) { 1746 close_slave_channel(u); 1747 } 1748 1749 return ret; 1750 } 1751 1752 #ifdef CONFIG_LINUX 1753 /* 1754 * Called back from the postcopy fault thread when a fault is received on our 1755 * ufd. 1756 * TODO: This is Linux specific 1757 */ 1758 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1759 void *ufd) 1760 { 1761 struct vhost_dev *dev = pcfd->data; 1762 struct vhost_user *u = dev->opaque; 1763 struct uffd_msg *msg = ufd; 1764 uint64_t faultaddr = msg->arg.pagefault.address; 1765 RAMBlock *rb = NULL; 1766 uint64_t rb_offset; 1767 int i; 1768 1769 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1770 dev->mem->nregions); 1771 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1772 trace_vhost_user_postcopy_fault_handler_loop(i, 1773 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1774 if (faultaddr >= u->postcopy_client_bases[i]) { 1775 /* Ofset of the fault address in the vhost region */ 1776 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1777 if (region_offset < dev->mem->regions[i].memory_size) { 1778 rb_offset = region_offset + u->region_rb_offset[i]; 1779 trace_vhost_user_postcopy_fault_handler_found(i, 1780 region_offset, rb_offset); 1781 rb = u->region_rb[i]; 1782 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1783 rb_offset); 1784 } 1785 } 1786 } 1787 error_report("%s: Failed to find region for fault %" PRIx64, 1788 __func__, faultaddr); 1789 return -1; 1790 } 1791 1792 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1793 uint64_t offset) 1794 { 1795 struct vhost_dev *dev = pcfd->data; 1796 struct vhost_user *u = dev->opaque; 1797 int i; 1798 1799 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1800 1801 if (!u) { 1802 return 0; 1803 } 1804 /* Translate the offset into an address in the clients address space */ 1805 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1806 if (u->region_rb[i] == rb && 1807 offset >= u->region_rb_offset[i] && 1808 offset < (u->region_rb_offset[i] + 1809 dev->mem->regions[i].memory_size)) { 1810 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1811 u->postcopy_client_bases[i]; 1812 trace_vhost_user_postcopy_waker_found(client_addr); 1813 return postcopy_wake_shared(pcfd, client_addr, rb); 1814 } 1815 } 1816 1817 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1818 return 0; 1819 } 1820 #endif 1821 1822 /* 1823 * Called at the start of an inbound postcopy on reception of the 1824 * 'advise' command. 1825 */ 1826 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1827 { 1828 #ifdef CONFIG_LINUX 1829 struct vhost_user *u = dev->opaque; 1830 CharBackend *chr = u->user->chr; 1831 int ufd; 1832 int ret; 1833 VhostUserMsg msg = { 1834 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1835 .hdr.flags = VHOST_USER_VERSION, 1836 }; 1837 1838 ret = vhost_user_write(dev, &msg, NULL, 0); 1839 if (ret < 0) { 1840 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1841 return ret; 1842 } 1843 1844 ret = vhost_user_read(dev, &msg); 1845 if (ret < 0) { 1846 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1847 return ret; 1848 } 1849 1850 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1851 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1852 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1853 return -EPROTO; 1854 } 1855 1856 if (msg.hdr.size) { 1857 error_setg(errp, "Received bad msg size."); 1858 return -EPROTO; 1859 } 1860 ufd = qemu_chr_fe_get_msgfd(chr); 1861 if (ufd < 0) { 1862 error_setg(errp, "%s: Failed to get ufd", __func__); 1863 return -EIO; 1864 } 1865 qemu_socket_set_nonblock(ufd); 1866 1867 /* register ufd with userfault thread */ 1868 u->postcopy_fd.fd = ufd; 1869 u->postcopy_fd.data = dev; 1870 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1871 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1872 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1873 postcopy_register_shared_ufd(&u->postcopy_fd); 1874 return 0; 1875 #else 1876 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1877 return -ENOSYS; 1878 #endif 1879 } 1880 1881 /* 1882 * Called at the switch to postcopy on reception of the 'listen' command. 1883 */ 1884 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1885 { 1886 struct vhost_user *u = dev->opaque; 1887 int ret; 1888 VhostUserMsg msg = { 1889 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1890 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1891 }; 1892 u->postcopy_listen = true; 1893 1894 trace_vhost_user_postcopy_listen(); 1895 1896 ret = vhost_user_write(dev, &msg, NULL, 0); 1897 if (ret < 0) { 1898 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1899 return ret; 1900 } 1901 1902 ret = process_message_reply(dev, &msg); 1903 if (ret) { 1904 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1905 return ret; 1906 } 1907 1908 return 0; 1909 } 1910 1911 /* 1912 * Called at the end of postcopy 1913 */ 1914 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1915 { 1916 VhostUserMsg msg = { 1917 .hdr.request = VHOST_USER_POSTCOPY_END, 1918 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1919 }; 1920 int ret; 1921 struct vhost_user *u = dev->opaque; 1922 1923 trace_vhost_user_postcopy_end_entry(); 1924 1925 ret = vhost_user_write(dev, &msg, NULL, 0); 1926 if (ret < 0) { 1927 error_setg(errp, "Failed to send postcopy_end to vhost"); 1928 return ret; 1929 } 1930 1931 ret = process_message_reply(dev, &msg); 1932 if (ret) { 1933 error_setg(errp, "Failed to receive reply to postcopy_end"); 1934 return ret; 1935 } 1936 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1937 close(u->postcopy_fd.fd); 1938 u->postcopy_fd.handler = NULL; 1939 1940 trace_vhost_user_postcopy_end_exit(); 1941 1942 return 0; 1943 } 1944 1945 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1946 void *opaque) 1947 { 1948 struct PostcopyNotifyData *pnd = opaque; 1949 struct vhost_user *u = container_of(notifier, struct vhost_user, 1950 postcopy_notifier); 1951 struct vhost_dev *dev = u->dev; 1952 1953 switch (pnd->reason) { 1954 case POSTCOPY_NOTIFY_PROBE: 1955 if (!virtio_has_feature(dev->protocol_features, 1956 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1957 /* TODO: Get the device name into this error somehow */ 1958 error_setg(pnd->errp, 1959 "vhost-user backend not capable of postcopy"); 1960 return -ENOENT; 1961 } 1962 break; 1963 1964 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1965 return vhost_user_postcopy_advise(dev, pnd->errp); 1966 1967 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1968 return vhost_user_postcopy_listen(dev, pnd->errp); 1969 1970 case POSTCOPY_NOTIFY_INBOUND_END: 1971 return vhost_user_postcopy_end(dev, pnd->errp); 1972 1973 default: 1974 /* We ignore notifications we don't know */ 1975 break; 1976 } 1977 1978 return 0; 1979 } 1980 1981 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1982 Error **errp) 1983 { 1984 uint64_t features, ram_slots; 1985 struct vhost_user *u; 1986 VhostUserState *vus = (VhostUserState *) opaque; 1987 int err; 1988 1989 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1990 1991 u = g_new0(struct vhost_user, 1); 1992 u->user = vus; 1993 u->dev = dev; 1994 dev->opaque = u; 1995 1996 err = vhost_user_get_features(dev, &features); 1997 if (err < 0) { 1998 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1999 return err; 2000 } 2001 2002 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2003 bool supports_f_config = vus->supports_config || 2004 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2005 uint64_t protocol_features; 2006 2007 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2008 2009 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2010 &protocol_features); 2011 if (err < 0) { 2012 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2013 return -EPROTO; 2014 } 2015 2016 /* 2017 * We will use all the protocol features we support - although 2018 * we suppress F_CONFIG if we know QEMUs internal code can not support 2019 * it. 2020 */ 2021 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2022 2023 if (supports_f_config) { 2024 if (!virtio_has_feature(protocol_features, 2025 VHOST_USER_PROTOCOL_F_CONFIG)) { 2026 error_setg(errp, "vhost-user device expecting " 2027 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2028 "not support it."); 2029 return -EPROTO; 2030 } 2031 } else { 2032 if (virtio_has_feature(protocol_features, 2033 VHOST_USER_PROTOCOL_F_CONFIG)) { 2034 warn_report("vhost-user backend supports " 2035 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2036 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2037 } 2038 } 2039 2040 /* final set of protocol features */ 2041 dev->protocol_features = protocol_features; 2042 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2043 if (err < 0) { 2044 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2045 return -EPROTO; 2046 } 2047 2048 /* query the max queues we support if backend supports Multiple Queue */ 2049 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2050 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2051 &dev->max_queues); 2052 if (err < 0) { 2053 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2054 return -EPROTO; 2055 } 2056 } else { 2057 dev->max_queues = 1; 2058 } 2059 2060 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2061 error_setg(errp, "The maximum number of queues supported by the " 2062 "backend is %" PRIu64, dev->max_queues); 2063 return -EINVAL; 2064 } 2065 2066 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2067 !(virtio_has_feature(dev->protocol_features, 2068 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2069 virtio_has_feature(dev->protocol_features, 2070 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2071 error_setg(errp, "IOMMU support requires reply-ack and " 2072 "slave-req protocol features."); 2073 return -EINVAL; 2074 } 2075 2076 /* get max memory regions if backend supports configurable RAM slots */ 2077 if (!virtio_has_feature(dev->protocol_features, 2078 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2079 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2080 } else { 2081 err = vhost_user_get_max_memslots(dev, &ram_slots); 2082 if (err < 0) { 2083 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2084 return -EPROTO; 2085 } 2086 2087 if (ram_slots < u->user->memory_slots) { 2088 error_setg(errp, "The backend specified a max ram slots limit " 2089 "of %" PRIu64", when the prior validated limit was " 2090 "%d. This limit should never decrease.", ram_slots, 2091 u->user->memory_slots); 2092 return -EINVAL; 2093 } 2094 2095 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2096 } 2097 } 2098 2099 if (dev->migration_blocker == NULL && 2100 !virtio_has_feature(dev->protocol_features, 2101 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2102 error_setg(&dev->migration_blocker, 2103 "Migration disabled: vhost-user backend lacks " 2104 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2105 } 2106 2107 if (dev->vq_index == 0) { 2108 err = vhost_setup_slave_channel(dev); 2109 if (err < 0) { 2110 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2111 return -EPROTO; 2112 } 2113 } 2114 2115 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2116 postcopy_add_notifier(&u->postcopy_notifier); 2117 2118 return 0; 2119 } 2120 2121 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2122 { 2123 struct vhost_user *u; 2124 2125 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2126 2127 u = dev->opaque; 2128 if (u->postcopy_notifier.notify) { 2129 postcopy_remove_notifier(&u->postcopy_notifier); 2130 u->postcopy_notifier.notify = NULL; 2131 } 2132 u->postcopy_listen = false; 2133 if (u->postcopy_fd.handler) { 2134 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2135 close(u->postcopy_fd.fd); 2136 u->postcopy_fd.handler = NULL; 2137 } 2138 if (u->slave_ioc) { 2139 close_slave_channel(u); 2140 } 2141 g_free(u->region_rb); 2142 u->region_rb = NULL; 2143 g_free(u->region_rb_offset); 2144 u->region_rb_offset = NULL; 2145 u->region_rb_len = 0; 2146 g_free(u); 2147 dev->opaque = 0; 2148 2149 return 0; 2150 } 2151 2152 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2153 { 2154 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2155 2156 return idx; 2157 } 2158 2159 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2160 { 2161 struct vhost_user *u = dev->opaque; 2162 2163 return u->user->memory_slots; 2164 } 2165 2166 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2167 { 2168 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2169 2170 return virtio_has_feature(dev->protocol_features, 2171 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2172 } 2173 2174 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2175 { 2176 VhostUserMsg msg = { }; 2177 2178 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2179 2180 /* If guest supports GUEST_ANNOUNCE do nothing */ 2181 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2182 return 0; 2183 } 2184 2185 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2186 if (virtio_has_feature(dev->protocol_features, 2187 VHOST_USER_PROTOCOL_F_RARP)) { 2188 msg.hdr.request = VHOST_USER_SEND_RARP; 2189 msg.hdr.flags = VHOST_USER_VERSION; 2190 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2191 msg.hdr.size = sizeof(msg.payload.u64); 2192 2193 return vhost_user_write(dev, &msg, NULL, 0); 2194 } 2195 return -ENOTSUP; 2196 } 2197 2198 static bool vhost_user_can_merge(struct vhost_dev *dev, 2199 uint64_t start1, uint64_t size1, 2200 uint64_t start2, uint64_t size2) 2201 { 2202 ram_addr_t offset; 2203 int mfd, rfd; 2204 2205 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2206 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2207 2208 return mfd == rfd; 2209 } 2210 2211 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2212 { 2213 VhostUserMsg msg; 2214 bool reply_supported = virtio_has_feature(dev->protocol_features, 2215 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2216 int ret; 2217 2218 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2219 return 0; 2220 } 2221 2222 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2223 msg.payload.u64 = mtu; 2224 msg.hdr.size = sizeof(msg.payload.u64); 2225 msg.hdr.flags = VHOST_USER_VERSION; 2226 if (reply_supported) { 2227 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2228 } 2229 2230 ret = vhost_user_write(dev, &msg, NULL, 0); 2231 if (ret < 0) { 2232 return ret; 2233 } 2234 2235 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2236 if (reply_supported) { 2237 return process_message_reply(dev, &msg); 2238 } 2239 2240 return 0; 2241 } 2242 2243 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2244 struct vhost_iotlb_msg *imsg) 2245 { 2246 int ret; 2247 VhostUserMsg msg = { 2248 .hdr.request = VHOST_USER_IOTLB_MSG, 2249 .hdr.size = sizeof(msg.payload.iotlb), 2250 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2251 .payload.iotlb = *imsg, 2252 }; 2253 2254 ret = vhost_user_write(dev, &msg, NULL, 0); 2255 if (ret < 0) { 2256 return ret; 2257 } 2258 2259 return process_message_reply(dev, &msg); 2260 } 2261 2262 2263 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2264 { 2265 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2266 } 2267 2268 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2269 uint32_t config_len, Error **errp) 2270 { 2271 int ret; 2272 VhostUserMsg msg = { 2273 .hdr.request = VHOST_USER_GET_CONFIG, 2274 .hdr.flags = VHOST_USER_VERSION, 2275 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2276 }; 2277 2278 if (!virtio_has_feature(dev->protocol_features, 2279 VHOST_USER_PROTOCOL_F_CONFIG)) { 2280 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2281 return -EINVAL; 2282 } 2283 2284 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2285 2286 msg.payload.config.offset = 0; 2287 msg.payload.config.size = config_len; 2288 ret = vhost_user_write(dev, &msg, NULL, 0); 2289 if (ret < 0) { 2290 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2291 return ret; 2292 } 2293 2294 ret = vhost_user_read(dev, &msg); 2295 if (ret < 0) { 2296 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2297 return ret; 2298 } 2299 2300 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2301 error_setg(errp, 2302 "Received unexpected msg type. Expected %d received %d", 2303 VHOST_USER_GET_CONFIG, msg.hdr.request); 2304 return -EPROTO; 2305 } 2306 2307 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2308 error_setg(errp, "Received bad msg size."); 2309 return -EPROTO; 2310 } 2311 2312 memcpy(config, msg.payload.config.region, config_len); 2313 2314 return 0; 2315 } 2316 2317 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2318 uint32_t offset, uint32_t size, uint32_t flags) 2319 { 2320 int ret; 2321 uint8_t *p; 2322 bool reply_supported = virtio_has_feature(dev->protocol_features, 2323 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2324 2325 VhostUserMsg msg = { 2326 .hdr.request = VHOST_USER_SET_CONFIG, 2327 .hdr.flags = VHOST_USER_VERSION, 2328 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2329 }; 2330 2331 if (!virtio_has_feature(dev->protocol_features, 2332 VHOST_USER_PROTOCOL_F_CONFIG)) { 2333 return -ENOTSUP; 2334 } 2335 2336 if (reply_supported) { 2337 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2338 } 2339 2340 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2341 return -EINVAL; 2342 } 2343 2344 msg.payload.config.offset = offset, 2345 msg.payload.config.size = size, 2346 msg.payload.config.flags = flags, 2347 p = msg.payload.config.region; 2348 memcpy(p, data, size); 2349 2350 ret = vhost_user_write(dev, &msg, NULL, 0); 2351 if (ret < 0) { 2352 return ret; 2353 } 2354 2355 if (reply_supported) { 2356 return process_message_reply(dev, &msg); 2357 } 2358 2359 return 0; 2360 } 2361 2362 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2363 void *session_info, 2364 uint64_t *session_id) 2365 { 2366 int ret; 2367 bool crypto_session = virtio_has_feature(dev->protocol_features, 2368 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2369 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2370 VhostUserMsg msg = { 2371 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2372 .hdr.flags = VHOST_USER_VERSION, 2373 .hdr.size = sizeof(msg.payload.session), 2374 }; 2375 2376 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2377 2378 if (!crypto_session) { 2379 error_report("vhost-user trying to send unhandled ioctl"); 2380 return -ENOTSUP; 2381 } 2382 2383 memcpy(&msg.payload.session.session_setup_data, sess_info, 2384 sizeof(CryptoDevBackendSymSessionInfo)); 2385 if (sess_info->key_len) { 2386 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2387 sess_info->key_len); 2388 } 2389 if (sess_info->auth_key_len > 0) { 2390 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2391 sess_info->auth_key_len); 2392 } 2393 ret = vhost_user_write(dev, &msg, NULL, 0); 2394 if (ret < 0) { 2395 error_report("vhost_user_write() return %d, create session failed", 2396 ret); 2397 return ret; 2398 } 2399 2400 ret = vhost_user_read(dev, &msg); 2401 if (ret < 0) { 2402 error_report("vhost_user_read() return %d, create session failed", 2403 ret); 2404 return ret; 2405 } 2406 2407 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2408 error_report("Received unexpected msg type. Expected %d received %d", 2409 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2410 return -EPROTO; 2411 } 2412 2413 if (msg.hdr.size != sizeof(msg.payload.session)) { 2414 error_report("Received bad msg size."); 2415 return -EPROTO; 2416 } 2417 2418 if (msg.payload.session.session_id < 0) { 2419 error_report("Bad session id: %" PRId64 "", 2420 msg.payload.session.session_id); 2421 return -EINVAL; 2422 } 2423 *session_id = msg.payload.session.session_id; 2424 2425 return 0; 2426 } 2427 2428 static int 2429 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2430 { 2431 int ret; 2432 bool crypto_session = virtio_has_feature(dev->protocol_features, 2433 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2434 VhostUserMsg msg = { 2435 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2436 .hdr.flags = VHOST_USER_VERSION, 2437 .hdr.size = sizeof(msg.payload.u64), 2438 }; 2439 msg.payload.u64 = session_id; 2440 2441 if (!crypto_session) { 2442 error_report("vhost-user trying to send unhandled ioctl"); 2443 return -ENOTSUP; 2444 } 2445 2446 ret = vhost_user_write(dev, &msg, NULL, 0); 2447 if (ret < 0) { 2448 error_report("vhost_user_write() return %d, close session failed", 2449 ret); 2450 return ret; 2451 } 2452 2453 return 0; 2454 } 2455 2456 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2457 MemoryRegionSection *section) 2458 { 2459 return memory_region_get_fd(section->mr) >= 0; 2460 } 2461 2462 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2463 uint16_t queue_size, 2464 struct vhost_inflight *inflight) 2465 { 2466 void *addr; 2467 int fd; 2468 int ret; 2469 struct vhost_user *u = dev->opaque; 2470 CharBackend *chr = u->user->chr; 2471 VhostUserMsg msg = { 2472 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2473 .hdr.flags = VHOST_USER_VERSION, 2474 .payload.inflight.num_queues = dev->nvqs, 2475 .payload.inflight.queue_size = queue_size, 2476 .hdr.size = sizeof(msg.payload.inflight), 2477 }; 2478 2479 if (!virtio_has_feature(dev->protocol_features, 2480 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2481 return 0; 2482 } 2483 2484 ret = vhost_user_write(dev, &msg, NULL, 0); 2485 if (ret < 0) { 2486 return ret; 2487 } 2488 2489 ret = vhost_user_read(dev, &msg); 2490 if (ret < 0) { 2491 return ret; 2492 } 2493 2494 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2495 error_report("Received unexpected msg type. " 2496 "Expected %d received %d", 2497 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2498 return -EPROTO; 2499 } 2500 2501 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2502 error_report("Received bad msg size."); 2503 return -EPROTO; 2504 } 2505 2506 if (!msg.payload.inflight.mmap_size) { 2507 return 0; 2508 } 2509 2510 fd = qemu_chr_fe_get_msgfd(chr); 2511 if (fd < 0) { 2512 error_report("Failed to get mem fd"); 2513 return -EIO; 2514 } 2515 2516 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2517 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2518 2519 if (addr == MAP_FAILED) { 2520 error_report("Failed to mmap mem fd"); 2521 close(fd); 2522 return -EFAULT; 2523 } 2524 2525 inflight->addr = addr; 2526 inflight->fd = fd; 2527 inflight->size = msg.payload.inflight.mmap_size; 2528 inflight->offset = msg.payload.inflight.mmap_offset; 2529 inflight->queue_size = queue_size; 2530 2531 return 0; 2532 } 2533 2534 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2535 struct vhost_inflight *inflight) 2536 { 2537 VhostUserMsg msg = { 2538 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2539 .hdr.flags = VHOST_USER_VERSION, 2540 .payload.inflight.mmap_size = inflight->size, 2541 .payload.inflight.mmap_offset = inflight->offset, 2542 .payload.inflight.num_queues = dev->nvqs, 2543 .payload.inflight.queue_size = inflight->queue_size, 2544 .hdr.size = sizeof(msg.payload.inflight), 2545 }; 2546 2547 if (!virtio_has_feature(dev->protocol_features, 2548 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2549 return 0; 2550 } 2551 2552 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2553 } 2554 2555 static void vhost_user_state_destroy(gpointer data) 2556 { 2557 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2558 if (n) { 2559 vhost_user_host_notifier_remove(n, NULL); 2560 object_unparent(OBJECT(&n->mr)); 2561 /* 2562 * We can't free until vhost_user_host_notifier_remove has 2563 * done it's thing so schedule the free with RCU. 2564 */ 2565 g_free_rcu(n, rcu); 2566 } 2567 } 2568 2569 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2570 { 2571 if (user->chr) { 2572 error_setg(errp, "Cannot initialize vhost-user state"); 2573 return false; 2574 } 2575 user->chr = chr; 2576 user->memory_slots = 0; 2577 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2578 &vhost_user_state_destroy); 2579 return true; 2580 } 2581 2582 void vhost_user_cleanup(VhostUserState *user) 2583 { 2584 if (!user->chr) { 2585 return; 2586 } 2587 memory_region_transaction_begin(); 2588 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2589 memory_region_transaction_commit(); 2590 user->chr = NULL; 2591 } 2592 2593 2594 typedef struct { 2595 vu_async_close_fn cb; 2596 DeviceState *dev; 2597 CharBackend *cd; 2598 struct vhost_dev *vhost; 2599 } VhostAsyncCallback; 2600 2601 static void vhost_user_async_close_bh(void *opaque) 2602 { 2603 VhostAsyncCallback *data = opaque; 2604 struct vhost_dev *vhost = data->vhost; 2605 2606 /* 2607 * If the vhost_dev has been cleared in the meantime there is 2608 * nothing left to do as some other path has completed the 2609 * cleanup. 2610 */ 2611 if (vhost->vdev) { 2612 data->cb(data->dev); 2613 } 2614 2615 g_free(data); 2616 } 2617 2618 /* 2619 * We only schedule the work if the machine is running. If suspended 2620 * we want to keep all the in-flight data as is for migration 2621 * purposes. 2622 */ 2623 void vhost_user_async_close(DeviceState *d, 2624 CharBackend *chardev, struct vhost_dev *vhost, 2625 vu_async_close_fn cb) 2626 { 2627 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2628 /* 2629 * A close event may happen during a read/write, but vhost 2630 * code assumes the vhost_dev remains setup, so delay the 2631 * stop & clear. 2632 */ 2633 AioContext *ctx = qemu_get_current_aio_context(); 2634 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2635 2636 /* Save data for the callback */ 2637 data->cb = cb; 2638 data->dev = d; 2639 data->cd = chardev; 2640 data->vhost = vhost; 2641 2642 /* Disable any further notifications on the chardev */ 2643 qemu_chr_fe_set_handlers(chardev, 2644 NULL, NULL, NULL, NULL, NULL, NULL, 2645 false); 2646 2647 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2648 2649 /* 2650 * Move vhost device to the stopped state. The vhost-user device 2651 * will be clean up and disconnected in BH. This can be useful in 2652 * the vhost migration code. If disconnect was caught there is an 2653 * option for the general vhost code to get the dev state without 2654 * knowing its type (in this case vhost-user). 2655 * 2656 * Note if the vhost device is fully cleared by the time we 2657 * execute the bottom half we won't continue with the cleanup. 2658 */ 2659 vhost->started = false; 2660 } 2661 } 2662 2663 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2664 { 2665 if (!virtio_has_feature(dev->protocol_features, 2666 VHOST_USER_PROTOCOL_F_STATUS)) { 2667 return 0; 2668 } 2669 2670 /* Set device status only for last queue pair */ 2671 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2672 return 0; 2673 } 2674 2675 if (started) { 2676 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2677 VIRTIO_CONFIG_S_DRIVER | 2678 VIRTIO_CONFIG_S_DRIVER_OK); 2679 } else { 2680 return vhost_user_set_status(dev, 0); 2681 } 2682 } 2683 2684 const VhostOps user_ops = { 2685 .backend_type = VHOST_BACKEND_TYPE_USER, 2686 .vhost_backend_init = vhost_user_backend_init, 2687 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2688 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2689 .vhost_set_log_base = vhost_user_set_log_base, 2690 .vhost_set_mem_table = vhost_user_set_mem_table, 2691 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2692 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2693 .vhost_set_vring_num = vhost_user_set_vring_num, 2694 .vhost_set_vring_base = vhost_user_set_vring_base, 2695 .vhost_get_vring_base = vhost_user_get_vring_base, 2696 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2697 .vhost_set_vring_call = vhost_user_set_vring_call, 2698 .vhost_set_vring_err = vhost_user_set_vring_err, 2699 .vhost_set_features = vhost_user_set_features, 2700 .vhost_get_features = vhost_user_get_features, 2701 .vhost_set_owner = vhost_user_set_owner, 2702 .vhost_reset_device = vhost_user_reset_device, 2703 .vhost_get_vq_index = vhost_user_get_vq_index, 2704 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2705 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2706 .vhost_migration_done = vhost_user_migration_done, 2707 .vhost_backend_can_merge = vhost_user_can_merge, 2708 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2709 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2710 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2711 .vhost_get_config = vhost_user_get_config, 2712 .vhost_set_config = vhost_user_set_config, 2713 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2714 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2715 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2716 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2717 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2718 .vhost_dev_start = vhost_user_dev_start, 2719 }; 2720