1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/virtio-crypto.h" 15 #include "hw/virtio/vhost-user.h" 16 #include "hw/virtio/vhost-backend.h" 17 #include "hw/virtio/virtio.h" 18 #include "hw/virtio/virtio-net.h" 19 #include "chardev/char-fe.h" 20 #include "io/channel-socket.h" 21 #include "sysemu/kvm.h" 22 #include "qemu/error-report.h" 23 #include "qemu/main-loop.h" 24 #include "qemu/sockets.h" 25 #include "sysemu/runstate.h" 26 #include "sysemu/cryptodev.h" 27 #include "migration/migration.h" 28 #include "migration/postcopy-ram.h" 29 #include "trace.h" 30 #include "exec/ramblock.h" 31 32 #include <sys/ioctl.h> 33 #include <sys/socket.h> 34 #include <sys/un.h> 35 36 #include "standard-headers/linux/vhost_types.h" 37 38 #ifdef CONFIG_LINUX 39 #include <linux/userfaultfd.h> 40 #endif 41 42 #define VHOST_MEMORY_BASELINE_NREGIONS 8 43 #define VHOST_USER_F_PROTOCOL_FEATURES 30 44 #define VHOST_USER_BACKEND_MAX_FDS 8 45 46 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 47 #include "hw/ppc/spapr.h" 48 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 49 50 #else 51 #define VHOST_USER_MAX_RAM_SLOTS 512 52 #endif 53 54 /* 55 * Maximum size of virtio device config space 56 */ 57 #define VHOST_USER_MAX_CONFIG_SIZE 256 58 59 enum VhostUserProtocolFeature { 60 VHOST_USER_PROTOCOL_F_MQ = 0, 61 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 62 VHOST_USER_PROTOCOL_F_RARP = 2, 63 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 64 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 65 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, 66 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 67 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 68 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 69 VHOST_USER_PROTOCOL_F_CONFIG = 9, 70 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, 71 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 72 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 73 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 74 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 75 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 76 VHOST_USER_PROTOCOL_F_STATUS = 16, 77 VHOST_USER_PROTOCOL_F_MAX 78 }; 79 80 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 81 82 typedef enum VhostUserRequest { 83 VHOST_USER_NONE = 0, 84 VHOST_USER_GET_FEATURES = 1, 85 VHOST_USER_SET_FEATURES = 2, 86 VHOST_USER_SET_OWNER = 3, 87 VHOST_USER_RESET_OWNER = 4, 88 VHOST_USER_SET_MEM_TABLE = 5, 89 VHOST_USER_SET_LOG_BASE = 6, 90 VHOST_USER_SET_LOG_FD = 7, 91 VHOST_USER_SET_VRING_NUM = 8, 92 VHOST_USER_SET_VRING_ADDR = 9, 93 VHOST_USER_SET_VRING_BASE = 10, 94 VHOST_USER_GET_VRING_BASE = 11, 95 VHOST_USER_SET_VRING_KICK = 12, 96 VHOST_USER_SET_VRING_CALL = 13, 97 VHOST_USER_SET_VRING_ERR = 14, 98 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 99 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 100 VHOST_USER_GET_QUEUE_NUM = 17, 101 VHOST_USER_SET_VRING_ENABLE = 18, 102 VHOST_USER_SEND_RARP = 19, 103 VHOST_USER_NET_SET_MTU = 20, 104 VHOST_USER_SET_BACKEND_REQ_FD = 21, 105 VHOST_USER_IOTLB_MSG = 22, 106 VHOST_USER_SET_VRING_ENDIAN = 23, 107 VHOST_USER_GET_CONFIG = 24, 108 VHOST_USER_SET_CONFIG = 25, 109 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 110 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 111 VHOST_USER_POSTCOPY_ADVISE = 28, 112 VHOST_USER_POSTCOPY_LISTEN = 29, 113 VHOST_USER_POSTCOPY_END = 30, 114 VHOST_USER_GET_INFLIGHT_FD = 31, 115 VHOST_USER_SET_INFLIGHT_FD = 32, 116 VHOST_USER_GPU_SET_SOCKET = 33, 117 VHOST_USER_RESET_DEVICE = 34, 118 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 119 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 120 VHOST_USER_ADD_MEM_REG = 37, 121 VHOST_USER_REM_MEM_REG = 38, 122 VHOST_USER_SET_STATUS = 39, 123 VHOST_USER_GET_STATUS = 40, 124 VHOST_USER_MAX 125 } VhostUserRequest; 126 127 typedef enum VhostUserBackendRequest { 128 VHOST_USER_BACKEND_NONE = 0, 129 VHOST_USER_BACKEND_IOTLB_MSG = 1, 130 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 131 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 132 VHOST_USER_BACKEND_MAX 133 } VhostUserBackendRequest; 134 135 typedef struct VhostUserMemoryRegion { 136 uint64_t guest_phys_addr; 137 uint64_t memory_size; 138 uint64_t userspace_addr; 139 uint64_t mmap_offset; 140 } VhostUserMemoryRegion; 141 142 typedef struct VhostUserMemory { 143 uint32_t nregions; 144 uint32_t padding; 145 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 146 } VhostUserMemory; 147 148 typedef struct VhostUserMemRegMsg { 149 uint64_t padding; 150 VhostUserMemoryRegion region; 151 } VhostUserMemRegMsg; 152 153 typedef struct VhostUserLog { 154 uint64_t mmap_size; 155 uint64_t mmap_offset; 156 } VhostUserLog; 157 158 typedef struct VhostUserConfig { 159 uint32_t offset; 160 uint32_t size; 161 uint32_t flags; 162 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 163 } VhostUserConfig; 164 165 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 166 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 167 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 168 169 typedef struct VhostUserCryptoSession { 170 uint64_t op_code; 171 union { 172 struct { 173 CryptoDevBackendSymSessionInfo session_setup_data; 174 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 175 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 176 } sym; 177 struct { 178 CryptoDevBackendAsymSessionInfo session_setup_data; 179 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 180 } asym; 181 } u; 182 183 /* session id for success, -1 on errors */ 184 int64_t session_id; 185 } VhostUserCryptoSession; 186 187 static VhostUserConfig c __attribute__ ((unused)); 188 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 189 + sizeof(c.size) \ 190 + sizeof(c.flags)) 191 192 typedef struct VhostUserVringArea { 193 uint64_t u64; 194 uint64_t size; 195 uint64_t offset; 196 } VhostUserVringArea; 197 198 typedef struct VhostUserInflight { 199 uint64_t mmap_size; 200 uint64_t mmap_offset; 201 uint16_t num_queues; 202 uint16_t queue_size; 203 } VhostUserInflight; 204 205 typedef struct { 206 VhostUserRequest request; 207 208 #define VHOST_USER_VERSION_MASK (0x3) 209 #define VHOST_USER_REPLY_MASK (0x1 << 2) 210 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 211 uint32_t flags; 212 uint32_t size; /* the following payload size */ 213 } QEMU_PACKED VhostUserHeader; 214 215 typedef union { 216 #define VHOST_USER_VRING_IDX_MASK (0xff) 217 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 218 uint64_t u64; 219 struct vhost_vring_state state; 220 struct vhost_vring_addr addr; 221 VhostUserMemory memory; 222 VhostUserMemRegMsg mem_reg; 223 VhostUserLog log; 224 struct vhost_iotlb_msg iotlb; 225 VhostUserConfig config; 226 VhostUserCryptoSession session; 227 VhostUserVringArea area; 228 VhostUserInflight inflight; 229 } VhostUserPayload; 230 231 typedef struct VhostUserMsg { 232 VhostUserHeader hdr; 233 VhostUserPayload payload; 234 } QEMU_PACKED VhostUserMsg; 235 236 static VhostUserMsg m __attribute__ ((unused)); 237 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 238 239 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 240 241 /* The version of the protocol we support */ 242 #define VHOST_USER_VERSION (0x1) 243 244 struct vhost_user { 245 struct vhost_dev *dev; 246 /* Shared between vhost devs of the same virtio device */ 247 VhostUserState *user; 248 QIOChannel *backend_ioc; 249 GSource *backend_src; 250 NotifierWithReturn postcopy_notifier; 251 struct PostCopyFD postcopy_fd; 252 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 253 /* Length of the region_rb and region_rb_offset arrays */ 254 size_t region_rb_len; 255 /* RAMBlock associated with a given region */ 256 RAMBlock **region_rb; 257 /* 258 * The offset from the start of the RAMBlock to the start of the 259 * vhost region. 260 */ 261 ram_addr_t *region_rb_offset; 262 263 /* True once we've entered postcopy_listen */ 264 bool postcopy_listen; 265 266 /* Our current regions */ 267 int num_shadow_regions; 268 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 269 }; 270 271 struct scrub_regions { 272 struct vhost_memory_region *region; 273 int reg_idx; 274 int fd_idx; 275 }; 276 277 static bool ioeventfd_enabled(void) 278 { 279 return !kvm_enabled() || kvm_eventfds_enabled(); 280 } 281 282 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 283 { 284 struct vhost_user *u = dev->opaque; 285 CharBackend *chr = u->user->chr; 286 uint8_t *p = (uint8_t *) msg; 287 int r, size = VHOST_USER_HDR_SIZE; 288 289 r = qemu_chr_fe_read_all(chr, p, size); 290 if (r != size) { 291 int saved_errno = errno; 292 error_report("Failed to read msg header. Read %d instead of %d." 293 " Original request %d.", r, size, msg->hdr.request); 294 return r < 0 ? -saved_errno : -EIO; 295 } 296 297 /* validate received flags */ 298 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 299 error_report("Failed to read msg header." 300 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 301 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 302 return -EPROTO; 303 } 304 305 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 306 307 return 0; 308 } 309 310 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 311 { 312 struct vhost_user *u = dev->opaque; 313 CharBackend *chr = u->user->chr; 314 uint8_t *p = (uint8_t *) msg; 315 int r, size; 316 317 r = vhost_user_read_header(dev, msg); 318 if (r < 0) { 319 return r; 320 } 321 322 /* validate message size is sane */ 323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 324 error_report("Failed to read msg header." 325 " Size %d exceeds the maximum %zu.", msg->hdr.size, 326 VHOST_USER_PAYLOAD_SIZE); 327 return -EPROTO; 328 } 329 330 if (msg->hdr.size) { 331 p += VHOST_USER_HDR_SIZE; 332 size = msg->hdr.size; 333 r = qemu_chr_fe_read_all(chr, p, size); 334 if (r != size) { 335 int saved_errno = errno; 336 error_report("Failed to read msg payload." 337 " Read %d instead of %d.", r, msg->hdr.size); 338 return r < 0 ? -saved_errno : -EIO; 339 } 340 } 341 342 return 0; 343 } 344 345 static int process_message_reply(struct vhost_dev *dev, 346 const VhostUserMsg *msg) 347 { 348 int ret; 349 VhostUserMsg msg_reply; 350 351 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 352 return 0; 353 } 354 355 ret = vhost_user_read(dev, &msg_reply); 356 if (ret < 0) { 357 return ret; 358 } 359 360 if (msg_reply.hdr.request != msg->hdr.request) { 361 error_report("Received unexpected msg type. " 362 "Expected %d received %d", 363 msg->hdr.request, msg_reply.hdr.request); 364 return -EPROTO; 365 } 366 367 return msg_reply.payload.u64 ? -EIO : 0; 368 } 369 370 static bool vhost_user_one_time_request(VhostUserRequest request) 371 { 372 switch (request) { 373 case VHOST_USER_SET_OWNER: 374 case VHOST_USER_RESET_OWNER: 375 case VHOST_USER_SET_MEM_TABLE: 376 case VHOST_USER_GET_QUEUE_NUM: 377 case VHOST_USER_NET_SET_MTU: 378 case VHOST_USER_ADD_MEM_REG: 379 case VHOST_USER_REM_MEM_REG: 380 return true; 381 default: 382 return false; 383 } 384 } 385 386 /* most non-init callers ignore the error */ 387 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 388 int *fds, int fd_num) 389 { 390 struct vhost_user *u = dev->opaque; 391 CharBackend *chr = u->user->chr; 392 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 393 394 /* 395 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 396 * we just need send it once in the first time. For later such 397 * request, we just ignore it. 398 */ 399 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 400 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 401 return 0; 402 } 403 404 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 405 error_report("Failed to set msg fds."); 406 return -EINVAL; 407 } 408 409 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 410 if (ret != size) { 411 int saved_errno = errno; 412 error_report("Failed to write msg." 413 " Wrote %d instead of %d.", ret, size); 414 return ret < 0 ? -saved_errno : -EIO; 415 } 416 417 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 418 419 return 0; 420 } 421 422 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 423 { 424 VhostUserMsg msg = { 425 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 426 .hdr.flags = VHOST_USER_VERSION, 427 }; 428 429 return vhost_user_write(dev, &msg, &fd, 1); 430 } 431 432 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 433 struct vhost_log *log) 434 { 435 int fds[VHOST_USER_MAX_RAM_SLOTS]; 436 size_t fd_num = 0; 437 bool shmfd = virtio_has_feature(dev->protocol_features, 438 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 439 int ret; 440 VhostUserMsg msg = { 441 .hdr.request = VHOST_USER_SET_LOG_BASE, 442 .hdr.flags = VHOST_USER_VERSION, 443 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 444 .payload.log.mmap_offset = 0, 445 .hdr.size = sizeof(msg.payload.log), 446 }; 447 448 /* Send only once with first queue pair */ 449 if (dev->vq_index != 0) { 450 return 0; 451 } 452 453 if (shmfd && log->fd != -1) { 454 fds[fd_num++] = log->fd; 455 } 456 457 ret = vhost_user_write(dev, &msg, fds, fd_num); 458 if (ret < 0) { 459 return ret; 460 } 461 462 if (shmfd) { 463 msg.hdr.size = 0; 464 ret = vhost_user_read(dev, &msg); 465 if (ret < 0) { 466 return ret; 467 } 468 469 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 470 error_report("Received unexpected msg type. " 471 "Expected %d received %d", 472 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 473 return -EPROTO; 474 } 475 } 476 477 return 0; 478 } 479 480 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 481 int *fd) 482 { 483 MemoryRegion *mr; 484 485 assert((uintptr_t)addr == addr); 486 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 487 *fd = memory_region_get_fd(mr); 488 *offset += mr->ram_block->fd_offset; 489 490 return mr; 491 } 492 493 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 494 struct vhost_memory_region *src, 495 uint64_t mmap_offset) 496 { 497 assert(src != NULL && dst != NULL); 498 dst->userspace_addr = src->userspace_addr; 499 dst->memory_size = src->memory_size; 500 dst->guest_phys_addr = src->guest_phys_addr; 501 dst->mmap_offset = mmap_offset; 502 } 503 504 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 505 struct vhost_dev *dev, 506 VhostUserMsg *msg, 507 int *fds, size_t *fd_num, 508 bool track_ramblocks) 509 { 510 int i, fd; 511 ram_addr_t offset; 512 MemoryRegion *mr; 513 struct vhost_memory_region *reg; 514 VhostUserMemoryRegion region_buffer; 515 516 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 517 518 for (i = 0; i < dev->mem->nregions; ++i) { 519 reg = dev->mem->regions + i; 520 521 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 522 if (fd > 0) { 523 if (track_ramblocks) { 524 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 525 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 526 reg->memory_size, 527 reg->guest_phys_addr, 528 reg->userspace_addr, 529 offset); 530 u->region_rb_offset[i] = offset; 531 u->region_rb[i] = mr->ram_block; 532 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 533 error_report("Failed preparing vhost-user memory table msg"); 534 return -ENOBUFS; 535 } 536 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 537 msg->payload.memory.regions[*fd_num] = region_buffer; 538 fds[(*fd_num)++] = fd; 539 } else if (track_ramblocks) { 540 u->region_rb_offset[i] = 0; 541 u->region_rb[i] = NULL; 542 } 543 } 544 545 msg->payload.memory.nregions = *fd_num; 546 547 if (!*fd_num) { 548 error_report("Failed initializing vhost-user memory map, " 549 "consider using -object memory-backend-file share=on"); 550 return -EINVAL; 551 } 552 553 msg->hdr.size = sizeof(msg->payload.memory.nregions); 554 msg->hdr.size += sizeof(msg->payload.memory.padding); 555 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 556 557 return 0; 558 } 559 560 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 561 struct vhost_memory_region *vdev_reg) 562 { 563 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 564 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 565 shadow_reg->memory_size == vdev_reg->memory_size; 566 } 567 568 static void scrub_shadow_regions(struct vhost_dev *dev, 569 struct scrub_regions *add_reg, 570 int *nr_add_reg, 571 struct scrub_regions *rem_reg, 572 int *nr_rem_reg, uint64_t *shadow_pcb, 573 bool track_ramblocks) 574 { 575 struct vhost_user *u = dev->opaque; 576 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 577 struct vhost_memory_region *reg, *shadow_reg; 578 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 579 ram_addr_t offset; 580 MemoryRegion *mr; 581 bool matching; 582 583 /* 584 * Find memory regions present in our shadow state which are not in 585 * the device's current memory state. 586 * 587 * Mark regions in both the shadow and device state as "found". 588 */ 589 for (i = 0; i < u->num_shadow_regions; i++) { 590 shadow_reg = &u->shadow_regions[i]; 591 matching = false; 592 593 for (j = 0; j < dev->mem->nregions; j++) { 594 reg = &dev->mem->regions[j]; 595 596 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 597 598 if (reg_equal(shadow_reg, reg)) { 599 matching = true; 600 found[j] = true; 601 if (track_ramblocks) { 602 /* 603 * Reset postcopy client bases, region_rb, and 604 * region_rb_offset in case regions are removed. 605 */ 606 if (fd > 0) { 607 u->region_rb_offset[j] = offset; 608 u->region_rb[j] = mr->ram_block; 609 shadow_pcb[j] = u->postcopy_client_bases[i]; 610 } else { 611 u->region_rb_offset[j] = 0; 612 u->region_rb[j] = NULL; 613 } 614 } 615 break; 616 } 617 } 618 619 /* 620 * If the region was not found in the current device memory state 621 * create an entry for it in the removed list. 622 */ 623 if (!matching) { 624 rem_reg[rm_idx].region = shadow_reg; 625 rem_reg[rm_idx++].reg_idx = i; 626 } 627 } 628 629 /* 630 * For regions not marked "found", create entries in the added list. 631 * 632 * Note their indexes in the device memory state and the indexes of their 633 * file descriptors. 634 */ 635 for (i = 0; i < dev->mem->nregions; i++) { 636 reg = &dev->mem->regions[i]; 637 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 638 if (fd > 0) { 639 ++fd_num; 640 } 641 642 /* 643 * If the region was in both the shadow and device state we don't 644 * need to send a VHOST_USER_ADD_MEM_REG message for it. 645 */ 646 if (found[i]) { 647 continue; 648 } 649 650 add_reg[add_idx].region = reg; 651 add_reg[add_idx].reg_idx = i; 652 add_reg[add_idx++].fd_idx = fd_num; 653 } 654 *nr_rem_reg = rm_idx; 655 *nr_add_reg = add_idx; 656 657 return; 658 } 659 660 static int send_remove_regions(struct vhost_dev *dev, 661 struct scrub_regions *remove_reg, 662 int nr_rem_reg, VhostUserMsg *msg, 663 bool reply_supported) 664 { 665 struct vhost_user *u = dev->opaque; 666 struct vhost_memory_region *shadow_reg; 667 int i, fd, shadow_reg_idx, ret; 668 ram_addr_t offset; 669 VhostUserMemoryRegion region_buffer; 670 671 /* 672 * The regions in remove_reg appear in the same order they do in the 673 * shadow table. Therefore we can minimize memory copies by iterating 674 * through remove_reg backwards. 675 */ 676 for (i = nr_rem_reg - 1; i >= 0; i--) { 677 shadow_reg = remove_reg[i].region; 678 shadow_reg_idx = remove_reg[i].reg_idx; 679 680 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 681 682 if (fd > 0) { 683 msg->hdr.request = VHOST_USER_REM_MEM_REG; 684 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 685 msg->payload.mem_reg.region = region_buffer; 686 687 ret = vhost_user_write(dev, msg, NULL, 0); 688 if (ret < 0) { 689 return ret; 690 } 691 692 if (reply_supported) { 693 ret = process_message_reply(dev, msg); 694 if (ret) { 695 return ret; 696 } 697 } 698 } 699 700 /* 701 * At this point we know the backend has unmapped the region. It is now 702 * safe to remove it from the shadow table. 703 */ 704 memmove(&u->shadow_regions[shadow_reg_idx], 705 &u->shadow_regions[shadow_reg_idx + 1], 706 sizeof(struct vhost_memory_region) * 707 (u->num_shadow_regions - shadow_reg_idx - 1)); 708 u->num_shadow_regions--; 709 } 710 711 return 0; 712 } 713 714 static int send_add_regions(struct vhost_dev *dev, 715 struct scrub_regions *add_reg, int nr_add_reg, 716 VhostUserMsg *msg, uint64_t *shadow_pcb, 717 bool reply_supported, bool track_ramblocks) 718 { 719 struct vhost_user *u = dev->opaque; 720 int i, fd, ret, reg_idx, reg_fd_idx; 721 struct vhost_memory_region *reg; 722 MemoryRegion *mr; 723 ram_addr_t offset; 724 VhostUserMsg msg_reply; 725 VhostUserMemoryRegion region_buffer; 726 727 for (i = 0; i < nr_add_reg; i++) { 728 reg = add_reg[i].region; 729 reg_idx = add_reg[i].reg_idx; 730 reg_fd_idx = add_reg[i].fd_idx; 731 732 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 733 734 if (fd > 0) { 735 if (track_ramblocks) { 736 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 737 reg->memory_size, 738 reg->guest_phys_addr, 739 reg->userspace_addr, 740 offset); 741 u->region_rb_offset[reg_idx] = offset; 742 u->region_rb[reg_idx] = mr->ram_block; 743 } 744 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 745 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 746 msg->payload.mem_reg.region = region_buffer; 747 748 ret = vhost_user_write(dev, msg, &fd, 1); 749 if (ret < 0) { 750 return ret; 751 } 752 753 if (track_ramblocks) { 754 uint64_t reply_gpa; 755 756 ret = vhost_user_read(dev, &msg_reply); 757 if (ret < 0) { 758 return ret; 759 } 760 761 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 762 763 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 764 error_report("%s: Received unexpected msg type." 765 "Expected %d received %d", __func__, 766 VHOST_USER_ADD_MEM_REG, 767 msg_reply.hdr.request); 768 return -EPROTO; 769 } 770 771 /* 772 * We're using the same structure, just reusing one of the 773 * fields, so it should be the same size. 774 */ 775 if (msg_reply.hdr.size != msg->hdr.size) { 776 error_report("%s: Unexpected size for postcopy reply " 777 "%d vs %d", __func__, msg_reply.hdr.size, 778 msg->hdr.size); 779 return -EPROTO; 780 } 781 782 /* Get the postcopy client base from the backend's reply. */ 783 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 784 shadow_pcb[reg_idx] = 785 msg_reply.payload.mem_reg.region.userspace_addr; 786 trace_vhost_user_set_mem_table_postcopy( 787 msg_reply.payload.mem_reg.region.userspace_addr, 788 msg->payload.mem_reg.region.userspace_addr, 789 reg_fd_idx, reg_idx); 790 } else { 791 error_report("%s: invalid postcopy reply for region. " 792 "Got guest physical address %" PRIX64 ", expected " 793 "%" PRIX64, __func__, reply_gpa, 794 dev->mem->regions[reg_idx].guest_phys_addr); 795 return -EPROTO; 796 } 797 } else if (reply_supported) { 798 ret = process_message_reply(dev, msg); 799 if (ret) { 800 return ret; 801 } 802 } 803 } else if (track_ramblocks) { 804 u->region_rb_offset[reg_idx] = 0; 805 u->region_rb[reg_idx] = NULL; 806 } 807 808 /* 809 * At this point, we know the backend has mapped in the new 810 * region, if the region has a valid file descriptor. 811 * 812 * The region should now be added to the shadow table. 813 */ 814 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 815 reg->guest_phys_addr; 816 u->shadow_regions[u->num_shadow_regions].userspace_addr = 817 reg->userspace_addr; 818 u->shadow_regions[u->num_shadow_regions].memory_size = 819 reg->memory_size; 820 u->num_shadow_regions++; 821 } 822 823 return 0; 824 } 825 826 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 827 VhostUserMsg *msg, 828 bool reply_supported, 829 bool track_ramblocks) 830 { 831 struct vhost_user *u = dev->opaque; 832 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 833 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 834 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 835 int nr_add_reg, nr_rem_reg; 836 int ret; 837 838 msg->hdr.size = sizeof(msg->payload.mem_reg); 839 840 /* Find the regions which need to be removed or added. */ 841 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 842 shadow_pcb, track_ramblocks); 843 844 if (nr_rem_reg) { 845 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 846 reply_supported); 847 if (ret < 0) { 848 goto err; 849 } 850 } 851 852 if (nr_add_reg) { 853 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 854 reply_supported, track_ramblocks); 855 if (ret < 0) { 856 goto err; 857 } 858 } 859 860 if (track_ramblocks) { 861 memcpy(u->postcopy_client_bases, shadow_pcb, 862 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 863 /* 864 * Now we've registered this with the postcopy code, we ack to the 865 * client, because now we're in the position to be able to deal with 866 * any faults it generates. 867 */ 868 /* TODO: Use this for failure cases as well with a bad value. */ 869 msg->hdr.size = sizeof(msg->payload.u64); 870 msg->payload.u64 = 0; /* OK */ 871 872 ret = vhost_user_write(dev, msg, NULL, 0); 873 if (ret < 0) { 874 return ret; 875 } 876 } 877 878 return 0; 879 880 err: 881 if (track_ramblocks) { 882 memcpy(u->postcopy_client_bases, shadow_pcb, 883 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 884 } 885 886 return ret; 887 } 888 889 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 890 struct vhost_memory *mem, 891 bool reply_supported, 892 bool config_mem_slots) 893 { 894 struct vhost_user *u = dev->opaque; 895 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 896 size_t fd_num = 0; 897 VhostUserMsg msg_reply; 898 int region_i, msg_i; 899 int ret; 900 901 VhostUserMsg msg = { 902 .hdr.flags = VHOST_USER_VERSION, 903 }; 904 905 if (u->region_rb_len < dev->mem->nregions) { 906 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 907 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 908 dev->mem->nregions); 909 memset(&(u->region_rb[u->region_rb_len]), '\0', 910 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 911 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 912 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 913 u->region_rb_len = dev->mem->nregions; 914 } 915 916 if (config_mem_slots) { 917 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 918 if (ret < 0) { 919 return ret; 920 } 921 } else { 922 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 923 true); 924 if (ret < 0) { 925 return ret; 926 } 927 928 ret = vhost_user_write(dev, &msg, fds, fd_num); 929 if (ret < 0) { 930 return ret; 931 } 932 933 ret = vhost_user_read(dev, &msg_reply); 934 if (ret < 0) { 935 return ret; 936 } 937 938 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 939 error_report("%s: Received unexpected msg type." 940 "Expected %d received %d", __func__, 941 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 942 return -EPROTO; 943 } 944 945 /* 946 * We're using the same structure, just reusing one of the 947 * fields, so it should be the same size. 948 */ 949 if (msg_reply.hdr.size != msg.hdr.size) { 950 error_report("%s: Unexpected size for postcopy reply " 951 "%d vs %d", __func__, msg_reply.hdr.size, 952 msg.hdr.size); 953 return -EPROTO; 954 } 955 956 memset(u->postcopy_client_bases, 0, 957 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 958 959 /* 960 * They're in the same order as the regions that were sent 961 * but some of the regions were skipped (above) if they 962 * didn't have fd's 963 */ 964 for (msg_i = 0, region_i = 0; 965 region_i < dev->mem->nregions; 966 region_i++) { 967 if (msg_i < fd_num && 968 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 969 dev->mem->regions[region_i].guest_phys_addr) { 970 u->postcopy_client_bases[region_i] = 971 msg_reply.payload.memory.regions[msg_i].userspace_addr; 972 trace_vhost_user_set_mem_table_postcopy( 973 msg_reply.payload.memory.regions[msg_i].userspace_addr, 974 msg.payload.memory.regions[msg_i].userspace_addr, 975 msg_i, region_i); 976 msg_i++; 977 } 978 } 979 if (msg_i != fd_num) { 980 error_report("%s: postcopy reply not fully consumed " 981 "%d vs %zd", 982 __func__, msg_i, fd_num); 983 return -EIO; 984 } 985 986 /* 987 * Now we've registered this with the postcopy code, we ack to the 988 * client, because now we're in the position to be able to deal 989 * with any faults it generates. 990 */ 991 /* TODO: Use this for failure cases as well with a bad value. */ 992 msg.hdr.size = sizeof(msg.payload.u64); 993 msg.payload.u64 = 0; /* OK */ 994 ret = vhost_user_write(dev, &msg, NULL, 0); 995 if (ret < 0) { 996 return ret; 997 } 998 } 999 1000 return 0; 1001 } 1002 1003 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1004 struct vhost_memory *mem) 1005 { 1006 struct vhost_user *u = dev->opaque; 1007 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1008 size_t fd_num = 0; 1009 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1010 bool reply_supported = virtio_has_feature(dev->protocol_features, 1011 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1012 bool config_mem_slots = 1013 virtio_has_feature(dev->protocol_features, 1014 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1015 int ret; 1016 1017 if (do_postcopy) { 1018 /* 1019 * Postcopy has enough differences that it's best done in it's own 1020 * version 1021 */ 1022 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1023 config_mem_slots); 1024 } 1025 1026 VhostUserMsg msg = { 1027 .hdr.flags = VHOST_USER_VERSION, 1028 }; 1029 1030 if (reply_supported) { 1031 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1032 } 1033 1034 if (config_mem_slots) { 1035 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1036 if (ret < 0) { 1037 return ret; 1038 } 1039 } else { 1040 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1041 false); 1042 if (ret < 0) { 1043 return ret; 1044 } 1045 1046 ret = vhost_user_write(dev, &msg, fds, fd_num); 1047 if (ret < 0) { 1048 return ret; 1049 } 1050 1051 if (reply_supported) { 1052 return process_message_reply(dev, &msg); 1053 } 1054 } 1055 1056 return 0; 1057 } 1058 1059 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1060 struct vhost_vring_state *ring) 1061 { 1062 bool cross_endian = virtio_has_feature(dev->protocol_features, 1063 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1064 VhostUserMsg msg = { 1065 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1066 .hdr.flags = VHOST_USER_VERSION, 1067 .payload.state = *ring, 1068 .hdr.size = sizeof(msg.payload.state), 1069 }; 1070 1071 if (!cross_endian) { 1072 error_report("vhost-user trying to send unhandled ioctl"); 1073 return -ENOTSUP; 1074 } 1075 1076 return vhost_user_write(dev, &msg, NULL, 0); 1077 } 1078 1079 static int vhost_set_vring(struct vhost_dev *dev, 1080 unsigned long int request, 1081 struct vhost_vring_state *ring) 1082 { 1083 VhostUserMsg msg = { 1084 .hdr.request = request, 1085 .hdr.flags = VHOST_USER_VERSION, 1086 .payload.state = *ring, 1087 .hdr.size = sizeof(msg.payload.state), 1088 }; 1089 1090 return vhost_user_write(dev, &msg, NULL, 0); 1091 } 1092 1093 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1094 struct vhost_vring_state *ring) 1095 { 1096 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1097 } 1098 1099 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1100 { 1101 assert(n && n->unmap_addr); 1102 munmap(n->unmap_addr, qemu_real_host_page_size()); 1103 n->unmap_addr = NULL; 1104 } 1105 1106 /* 1107 * clean-up function for notifier, will finally free the structure 1108 * under rcu. 1109 */ 1110 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1111 VirtIODevice *vdev) 1112 { 1113 if (n->addr) { 1114 if (vdev) { 1115 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1116 } 1117 assert(!n->unmap_addr); 1118 n->unmap_addr = n->addr; 1119 n->addr = NULL; 1120 call_rcu(n, vhost_user_host_notifier_free, rcu); 1121 } 1122 } 1123 1124 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1125 struct vhost_vring_state *ring) 1126 { 1127 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1128 } 1129 1130 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1131 { 1132 int i; 1133 1134 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1135 return -EINVAL; 1136 } 1137 1138 for (i = 0; i < dev->nvqs; ++i) { 1139 int ret; 1140 struct vhost_vring_state state = { 1141 .index = dev->vq_index + i, 1142 .num = enable, 1143 }; 1144 1145 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1146 if (ret < 0) { 1147 /* 1148 * Restoring the previous state is likely infeasible, as well as 1149 * proceeding regardless the error, so just bail out and hope for 1150 * the device-level recovery. 1151 */ 1152 return ret; 1153 } 1154 } 1155 1156 return 0; 1157 } 1158 1159 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1160 int idx) 1161 { 1162 if (idx >= u->notifiers->len) { 1163 return NULL; 1164 } 1165 return g_ptr_array_index(u->notifiers, idx); 1166 } 1167 1168 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1169 struct vhost_vring_state *ring) 1170 { 1171 int ret; 1172 VhostUserMsg msg = { 1173 .hdr.request = VHOST_USER_GET_VRING_BASE, 1174 .hdr.flags = VHOST_USER_VERSION, 1175 .payload.state = *ring, 1176 .hdr.size = sizeof(msg.payload.state), 1177 }; 1178 struct vhost_user *u = dev->opaque; 1179 1180 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1181 if (n) { 1182 vhost_user_host_notifier_remove(n, dev->vdev); 1183 } 1184 1185 ret = vhost_user_write(dev, &msg, NULL, 0); 1186 if (ret < 0) { 1187 return ret; 1188 } 1189 1190 ret = vhost_user_read(dev, &msg); 1191 if (ret < 0) { 1192 return ret; 1193 } 1194 1195 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1196 error_report("Received unexpected msg type. Expected %d received %d", 1197 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1198 return -EPROTO; 1199 } 1200 1201 if (msg.hdr.size != sizeof(msg.payload.state)) { 1202 error_report("Received bad msg size."); 1203 return -EPROTO; 1204 } 1205 1206 *ring = msg.payload.state; 1207 1208 return 0; 1209 } 1210 1211 static int vhost_set_vring_file(struct vhost_dev *dev, 1212 VhostUserRequest request, 1213 struct vhost_vring_file *file) 1214 { 1215 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1216 size_t fd_num = 0; 1217 VhostUserMsg msg = { 1218 .hdr.request = request, 1219 .hdr.flags = VHOST_USER_VERSION, 1220 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1221 .hdr.size = sizeof(msg.payload.u64), 1222 }; 1223 1224 if (ioeventfd_enabled() && file->fd > 0) { 1225 fds[fd_num++] = file->fd; 1226 } else { 1227 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1228 } 1229 1230 return vhost_user_write(dev, &msg, fds, fd_num); 1231 } 1232 1233 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1234 struct vhost_vring_file *file) 1235 { 1236 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1237 } 1238 1239 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1240 struct vhost_vring_file *file) 1241 { 1242 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1243 } 1244 1245 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1246 struct vhost_vring_file *file) 1247 { 1248 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1249 } 1250 1251 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1252 { 1253 int ret; 1254 VhostUserMsg msg = { 1255 .hdr.request = request, 1256 .hdr.flags = VHOST_USER_VERSION, 1257 }; 1258 1259 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1260 return 0; 1261 } 1262 1263 ret = vhost_user_write(dev, &msg, NULL, 0); 1264 if (ret < 0) { 1265 return ret; 1266 } 1267 1268 ret = vhost_user_read(dev, &msg); 1269 if (ret < 0) { 1270 return ret; 1271 } 1272 1273 if (msg.hdr.request != request) { 1274 error_report("Received unexpected msg type. Expected %d received %d", 1275 request, msg.hdr.request); 1276 return -EPROTO; 1277 } 1278 1279 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1280 error_report("Received bad msg size."); 1281 return -EPROTO; 1282 } 1283 1284 *u64 = msg.payload.u64; 1285 1286 return 0; 1287 } 1288 1289 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1290 { 1291 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1292 return -EPROTO; 1293 } 1294 1295 return 0; 1296 } 1297 1298 static int enforce_reply(struct vhost_dev *dev, 1299 const VhostUserMsg *msg) 1300 { 1301 uint64_t dummy; 1302 1303 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1304 return process_message_reply(dev, msg); 1305 } 1306 1307 /* 1308 * We need to wait for a reply but the backend does not 1309 * support replies for the command we just sent. 1310 * Send VHOST_USER_GET_FEATURES which makes all backends 1311 * send a reply. 1312 */ 1313 return vhost_user_get_features(dev, &dummy); 1314 } 1315 1316 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1317 struct vhost_vring_addr *addr) 1318 { 1319 int ret; 1320 VhostUserMsg msg = { 1321 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1322 .hdr.flags = VHOST_USER_VERSION, 1323 .payload.addr = *addr, 1324 .hdr.size = sizeof(msg.payload.addr), 1325 }; 1326 1327 bool reply_supported = virtio_has_feature(dev->protocol_features, 1328 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1329 1330 /* 1331 * wait for a reply if logging is enabled to make sure 1332 * backend is actually logging changes 1333 */ 1334 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1335 1336 if (reply_supported && wait_for_reply) { 1337 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1338 } 1339 1340 ret = vhost_user_write(dev, &msg, NULL, 0); 1341 if (ret < 0) { 1342 return ret; 1343 } 1344 1345 if (wait_for_reply) { 1346 return enforce_reply(dev, &msg); 1347 } 1348 1349 return 0; 1350 } 1351 1352 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1353 bool wait_for_reply) 1354 { 1355 VhostUserMsg msg = { 1356 .hdr.request = request, 1357 .hdr.flags = VHOST_USER_VERSION, 1358 .payload.u64 = u64, 1359 .hdr.size = sizeof(msg.payload.u64), 1360 }; 1361 int ret; 1362 1363 if (wait_for_reply) { 1364 bool reply_supported = virtio_has_feature(dev->protocol_features, 1365 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1366 if (reply_supported) { 1367 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1368 } 1369 } 1370 1371 ret = vhost_user_write(dev, &msg, NULL, 0); 1372 if (ret < 0) { 1373 return ret; 1374 } 1375 1376 if (wait_for_reply) { 1377 return enforce_reply(dev, &msg); 1378 } 1379 1380 return 0; 1381 } 1382 1383 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1384 { 1385 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1386 } 1387 1388 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1389 { 1390 uint64_t value; 1391 int ret; 1392 1393 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1394 if (ret < 0) { 1395 return ret; 1396 } 1397 *status = value; 1398 1399 return 0; 1400 } 1401 1402 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1403 { 1404 uint8_t s; 1405 int ret; 1406 1407 ret = vhost_user_get_status(dev, &s); 1408 if (ret < 0) { 1409 return ret; 1410 } 1411 1412 if ((s & status) == status) { 1413 return 0; 1414 } 1415 s |= status; 1416 1417 return vhost_user_set_status(dev, s); 1418 } 1419 1420 static int vhost_user_set_features(struct vhost_dev *dev, 1421 uint64_t features) 1422 { 1423 /* 1424 * wait for a reply if logging is enabled to make sure 1425 * backend is actually logging changes 1426 */ 1427 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1428 int ret; 1429 1430 /* 1431 * We need to include any extra backend only feature bits that 1432 * might be needed by our device. Currently this includes the 1433 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1434 * features. 1435 */ 1436 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1437 features | dev->backend_features, 1438 log_enabled); 1439 1440 if (virtio_has_feature(dev->protocol_features, 1441 VHOST_USER_PROTOCOL_F_STATUS)) { 1442 if (!ret) { 1443 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1444 } 1445 } 1446 1447 return ret; 1448 } 1449 1450 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1451 uint64_t features) 1452 { 1453 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1454 false); 1455 } 1456 1457 static int vhost_user_set_owner(struct vhost_dev *dev) 1458 { 1459 VhostUserMsg msg = { 1460 .hdr.request = VHOST_USER_SET_OWNER, 1461 .hdr.flags = VHOST_USER_VERSION, 1462 }; 1463 1464 return vhost_user_write(dev, &msg, NULL, 0); 1465 } 1466 1467 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1468 uint64_t *max_memslots) 1469 { 1470 uint64_t backend_max_memslots; 1471 int err; 1472 1473 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1474 &backend_max_memslots); 1475 if (err < 0) { 1476 return err; 1477 } 1478 1479 *max_memslots = backend_max_memslots; 1480 1481 return 0; 1482 } 1483 1484 static int vhost_user_reset_device(struct vhost_dev *dev) 1485 { 1486 VhostUserMsg msg = { 1487 .hdr.flags = VHOST_USER_VERSION, 1488 }; 1489 1490 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1491 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1492 ? VHOST_USER_RESET_DEVICE 1493 : VHOST_USER_RESET_OWNER; 1494 1495 return vhost_user_write(dev, &msg, NULL, 0); 1496 } 1497 1498 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1499 { 1500 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1501 return -ENOSYS; 1502 } 1503 1504 return dev->config_ops->vhost_dev_config_notifier(dev); 1505 } 1506 1507 /* 1508 * Fetch or create the notifier for a given idx. Newly created 1509 * notifiers are added to the pointer array that tracks them. 1510 */ 1511 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1512 int idx) 1513 { 1514 VhostUserHostNotifier *n = NULL; 1515 if (idx >= u->notifiers->len) { 1516 g_ptr_array_set_size(u->notifiers, idx + 1); 1517 } 1518 1519 n = g_ptr_array_index(u->notifiers, idx); 1520 if (!n) { 1521 /* 1522 * In case notification arrive out-of-order, 1523 * make room for current index. 1524 */ 1525 g_ptr_array_remove_index(u->notifiers, idx); 1526 n = g_new0(VhostUserHostNotifier, 1); 1527 n->idx = idx; 1528 g_ptr_array_insert(u->notifiers, idx, n); 1529 trace_vhost_user_create_notifier(idx, n); 1530 } 1531 1532 return n; 1533 } 1534 1535 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1536 VhostUserVringArea *area, 1537 int fd) 1538 { 1539 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1540 size_t page_size = qemu_real_host_page_size(); 1541 struct vhost_user *u = dev->opaque; 1542 VhostUserState *user = u->user; 1543 VirtIODevice *vdev = dev->vdev; 1544 VhostUserHostNotifier *n; 1545 void *addr; 1546 char *name; 1547 1548 if (!virtio_has_feature(dev->protocol_features, 1549 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1550 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1551 return -EINVAL; 1552 } 1553 1554 /* 1555 * Fetch notifier and invalidate any old data before setting up 1556 * new mapped address. 1557 */ 1558 n = fetch_or_create_notifier(user, queue_idx); 1559 vhost_user_host_notifier_remove(n, vdev); 1560 1561 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1562 return 0; 1563 } 1564 1565 /* Sanity check. */ 1566 if (area->size != page_size) { 1567 return -EINVAL; 1568 } 1569 1570 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1571 fd, area->offset); 1572 if (addr == MAP_FAILED) { 1573 return -EFAULT; 1574 } 1575 1576 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1577 user, queue_idx); 1578 if (!n->mr.ram) { /* Don't init again after suspend. */ 1579 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1580 page_size, addr); 1581 } else { 1582 n->mr.ram_block->host = addr; 1583 } 1584 g_free(name); 1585 1586 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1587 object_unparent(OBJECT(&n->mr)); 1588 munmap(addr, page_size); 1589 return -ENXIO; 1590 } 1591 1592 n->addr = addr; 1593 1594 return 0; 1595 } 1596 1597 static void close_backend_channel(struct vhost_user *u) 1598 { 1599 g_source_destroy(u->backend_src); 1600 g_source_unref(u->backend_src); 1601 u->backend_src = NULL; 1602 object_unref(OBJECT(u->backend_ioc)); 1603 u->backend_ioc = NULL; 1604 } 1605 1606 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1607 gpointer opaque) 1608 { 1609 struct vhost_dev *dev = opaque; 1610 struct vhost_user *u = dev->opaque; 1611 VhostUserHeader hdr = { 0, }; 1612 VhostUserPayload payload = { 0, }; 1613 Error *local_err = NULL; 1614 gboolean rc = G_SOURCE_CONTINUE; 1615 int ret = 0; 1616 struct iovec iov; 1617 g_autofree int *fd = NULL; 1618 size_t fdsize = 0; 1619 int i; 1620 1621 /* Read header */ 1622 iov.iov_base = &hdr; 1623 iov.iov_len = VHOST_USER_HDR_SIZE; 1624 1625 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1626 error_report_err(local_err); 1627 goto err; 1628 } 1629 1630 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1631 error_report("Failed to read msg header." 1632 " Size %d exceeds the maximum %zu.", hdr.size, 1633 VHOST_USER_PAYLOAD_SIZE); 1634 goto err; 1635 } 1636 1637 /* Read payload */ 1638 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1639 error_report_err(local_err); 1640 goto err; 1641 } 1642 1643 switch (hdr.request) { 1644 case VHOST_USER_BACKEND_IOTLB_MSG: 1645 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1646 break; 1647 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1648 ret = vhost_user_backend_handle_config_change(dev); 1649 break; 1650 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1651 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1652 fd ? fd[0] : -1); 1653 break; 1654 default: 1655 error_report("Received unexpected msg type: %d.", hdr.request); 1656 ret = -EINVAL; 1657 } 1658 1659 /* 1660 * REPLY_ACK feature handling. Other reply types has to be managed 1661 * directly in their request handlers. 1662 */ 1663 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1664 struct iovec iovec[2]; 1665 1666 1667 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1668 hdr.flags |= VHOST_USER_REPLY_MASK; 1669 1670 payload.u64 = !!ret; 1671 hdr.size = sizeof(payload.u64); 1672 1673 iovec[0].iov_base = &hdr; 1674 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1675 iovec[1].iov_base = &payload; 1676 iovec[1].iov_len = hdr.size; 1677 1678 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1679 error_report_err(local_err); 1680 goto err; 1681 } 1682 } 1683 1684 goto fdcleanup; 1685 1686 err: 1687 close_backend_channel(u); 1688 rc = G_SOURCE_REMOVE; 1689 1690 fdcleanup: 1691 if (fd) { 1692 for (i = 0; i < fdsize; i++) { 1693 close(fd[i]); 1694 } 1695 } 1696 return rc; 1697 } 1698 1699 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1700 { 1701 VhostUserMsg msg = { 1702 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1703 .hdr.flags = VHOST_USER_VERSION, 1704 }; 1705 struct vhost_user *u = dev->opaque; 1706 int sv[2], ret = 0; 1707 bool reply_supported = virtio_has_feature(dev->protocol_features, 1708 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1709 Error *local_err = NULL; 1710 QIOChannel *ioc; 1711 1712 if (!virtio_has_feature(dev->protocol_features, 1713 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1714 return 0; 1715 } 1716 1717 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1718 int saved_errno = errno; 1719 error_report("socketpair() failed"); 1720 return -saved_errno; 1721 } 1722 1723 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1724 if (!ioc) { 1725 error_report_err(local_err); 1726 return -ECONNREFUSED; 1727 } 1728 u->backend_ioc = ioc; 1729 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1730 G_IO_IN | G_IO_HUP, 1731 backend_read, dev, NULL, NULL); 1732 1733 if (reply_supported) { 1734 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1735 } 1736 1737 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1738 if (ret) { 1739 goto out; 1740 } 1741 1742 if (reply_supported) { 1743 ret = process_message_reply(dev, &msg); 1744 } 1745 1746 out: 1747 close(sv[1]); 1748 if (ret) { 1749 close_backend_channel(u); 1750 } 1751 1752 return ret; 1753 } 1754 1755 #ifdef CONFIG_LINUX 1756 /* 1757 * Called back from the postcopy fault thread when a fault is received on our 1758 * ufd. 1759 * TODO: This is Linux specific 1760 */ 1761 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1762 void *ufd) 1763 { 1764 struct vhost_dev *dev = pcfd->data; 1765 struct vhost_user *u = dev->opaque; 1766 struct uffd_msg *msg = ufd; 1767 uint64_t faultaddr = msg->arg.pagefault.address; 1768 RAMBlock *rb = NULL; 1769 uint64_t rb_offset; 1770 int i; 1771 1772 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1773 dev->mem->nregions); 1774 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1775 trace_vhost_user_postcopy_fault_handler_loop(i, 1776 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1777 if (faultaddr >= u->postcopy_client_bases[i]) { 1778 /* Ofset of the fault address in the vhost region */ 1779 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1780 if (region_offset < dev->mem->regions[i].memory_size) { 1781 rb_offset = region_offset + u->region_rb_offset[i]; 1782 trace_vhost_user_postcopy_fault_handler_found(i, 1783 region_offset, rb_offset); 1784 rb = u->region_rb[i]; 1785 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1786 rb_offset); 1787 } 1788 } 1789 } 1790 error_report("%s: Failed to find region for fault %" PRIx64, 1791 __func__, faultaddr); 1792 return -1; 1793 } 1794 1795 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1796 uint64_t offset) 1797 { 1798 struct vhost_dev *dev = pcfd->data; 1799 struct vhost_user *u = dev->opaque; 1800 int i; 1801 1802 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1803 1804 if (!u) { 1805 return 0; 1806 } 1807 /* Translate the offset into an address in the clients address space */ 1808 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1809 if (u->region_rb[i] == rb && 1810 offset >= u->region_rb_offset[i] && 1811 offset < (u->region_rb_offset[i] + 1812 dev->mem->regions[i].memory_size)) { 1813 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1814 u->postcopy_client_bases[i]; 1815 trace_vhost_user_postcopy_waker_found(client_addr); 1816 return postcopy_wake_shared(pcfd, client_addr, rb); 1817 } 1818 } 1819 1820 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1821 return 0; 1822 } 1823 #endif 1824 1825 /* 1826 * Called at the start of an inbound postcopy on reception of the 1827 * 'advise' command. 1828 */ 1829 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1830 { 1831 #ifdef CONFIG_LINUX 1832 struct vhost_user *u = dev->opaque; 1833 CharBackend *chr = u->user->chr; 1834 int ufd; 1835 int ret; 1836 VhostUserMsg msg = { 1837 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1838 .hdr.flags = VHOST_USER_VERSION, 1839 }; 1840 1841 ret = vhost_user_write(dev, &msg, NULL, 0); 1842 if (ret < 0) { 1843 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1844 return ret; 1845 } 1846 1847 ret = vhost_user_read(dev, &msg); 1848 if (ret < 0) { 1849 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1850 return ret; 1851 } 1852 1853 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1854 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1855 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1856 return -EPROTO; 1857 } 1858 1859 if (msg.hdr.size) { 1860 error_setg(errp, "Received bad msg size."); 1861 return -EPROTO; 1862 } 1863 ufd = qemu_chr_fe_get_msgfd(chr); 1864 if (ufd < 0) { 1865 error_setg(errp, "%s: Failed to get ufd", __func__); 1866 return -EIO; 1867 } 1868 qemu_socket_set_nonblock(ufd); 1869 1870 /* register ufd with userfault thread */ 1871 u->postcopy_fd.fd = ufd; 1872 u->postcopy_fd.data = dev; 1873 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1874 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1875 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1876 postcopy_register_shared_ufd(&u->postcopy_fd); 1877 return 0; 1878 #else 1879 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1880 return -ENOSYS; 1881 #endif 1882 } 1883 1884 /* 1885 * Called at the switch to postcopy on reception of the 'listen' command. 1886 */ 1887 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1888 { 1889 struct vhost_user *u = dev->opaque; 1890 int ret; 1891 VhostUserMsg msg = { 1892 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1893 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1894 }; 1895 u->postcopy_listen = true; 1896 1897 trace_vhost_user_postcopy_listen(); 1898 1899 ret = vhost_user_write(dev, &msg, NULL, 0); 1900 if (ret < 0) { 1901 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1902 return ret; 1903 } 1904 1905 ret = process_message_reply(dev, &msg); 1906 if (ret) { 1907 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1908 return ret; 1909 } 1910 1911 return 0; 1912 } 1913 1914 /* 1915 * Called at the end of postcopy 1916 */ 1917 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1918 { 1919 VhostUserMsg msg = { 1920 .hdr.request = VHOST_USER_POSTCOPY_END, 1921 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1922 }; 1923 int ret; 1924 struct vhost_user *u = dev->opaque; 1925 1926 trace_vhost_user_postcopy_end_entry(); 1927 1928 ret = vhost_user_write(dev, &msg, NULL, 0); 1929 if (ret < 0) { 1930 error_setg(errp, "Failed to send postcopy_end to vhost"); 1931 return ret; 1932 } 1933 1934 ret = process_message_reply(dev, &msg); 1935 if (ret) { 1936 error_setg(errp, "Failed to receive reply to postcopy_end"); 1937 return ret; 1938 } 1939 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1940 close(u->postcopy_fd.fd); 1941 u->postcopy_fd.handler = NULL; 1942 1943 trace_vhost_user_postcopy_end_exit(); 1944 1945 return 0; 1946 } 1947 1948 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1949 void *opaque) 1950 { 1951 struct PostcopyNotifyData *pnd = opaque; 1952 struct vhost_user *u = container_of(notifier, struct vhost_user, 1953 postcopy_notifier); 1954 struct vhost_dev *dev = u->dev; 1955 1956 switch (pnd->reason) { 1957 case POSTCOPY_NOTIFY_PROBE: 1958 if (!virtio_has_feature(dev->protocol_features, 1959 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1960 /* TODO: Get the device name into this error somehow */ 1961 error_setg(pnd->errp, 1962 "vhost-user backend not capable of postcopy"); 1963 return -ENOENT; 1964 } 1965 break; 1966 1967 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1968 return vhost_user_postcopy_advise(dev, pnd->errp); 1969 1970 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1971 return vhost_user_postcopy_listen(dev, pnd->errp); 1972 1973 case POSTCOPY_NOTIFY_INBOUND_END: 1974 return vhost_user_postcopy_end(dev, pnd->errp); 1975 1976 default: 1977 /* We ignore notifications we don't know */ 1978 break; 1979 } 1980 1981 return 0; 1982 } 1983 1984 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1985 Error **errp) 1986 { 1987 uint64_t features, ram_slots; 1988 struct vhost_user *u; 1989 VhostUserState *vus = (VhostUserState *) opaque; 1990 int err; 1991 1992 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1993 1994 u = g_new0(struct vhost_user, 1); 1995 u->user = vus; 1996 u->dev = dev; 1997 dev->opaque = u; 1998 1999 err = vhost_user_get_features(dev, &features); 2000 if (err < 0) { 2001 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2002 return err; 2003 } 2004 2005 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2006 bool supports_f_config = vus->supports_config || 2007 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2008 uint64_t protocol_features; 2009 2010 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2011 2012 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2013 &protocol_features); 2014 if (err < 0) { 2015 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2016 return -EPROTO; 2017 } 2018 2019 /* 2020 * We will use all the protocol features we support - although 2021 * we suppress F_CONFIG if we know QEMUs internal code can not support 2022 * it. 2023 */ 2024 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2025 2026 if (supports_f_config) { 2027 if (!virtio_has_feature(protocol_features, 2028 VHOST_USER_PROTOCOL_F_CONFIG)) { 2029 error_setg(errp, "vhost-user device expecting " 2030 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2031 "not support it."); 2032 return -EPROTO; 2033 } 2034 } else { 2035 if (virtio_has_feature(protocol_features, 2036 VHOST_USER_PROTOCOL_F_CONFIG)) { 2037 warn_report("vhost-user backend supports " 2038 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2039 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2040 } 2041 } 2042 2043 /* final set of protocol features */ 2044 dev->protocol_features = protocol_features; 2045 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2046 if (err < 0) { 2047 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2048 return -EPROTO; 2049 } 2050 2051 /* query the max queues we support if backend supports Multiple Queue */ 2052 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2053 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2054 &dev->max_queues); 2055 if (err < 0) { 2056 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2057 return -EPROTO; 2058 } 2059 } else { 2060 dev->max_queues = 1; 2061 } 2062 2063 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2064 error_setg(errp, "The maximum number of queues supported by the " 2065 "backend is %" PRIu64, dev->max_queues); 2066 return -EINVAL; 2067 } 2068 2069 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2070 !(virtio_has_feature(dev->protocol_features, 2071 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2072 virtio_has_feature(dev->protocol_features, 2073 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2074 error_setg(errp, "IOMMU support requires reply-ack and " 2075 "backend-req protocol features."); 2076 return -EINVAL; 2077 } 2078 2079 /* get max memory regions if backend supports configurable RAM slots */ 2080 if (!virtio_has_feature(dev->protocol_features, 2081 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2082 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2083 } else { 2084 err = vhost_user_get_max_memslots(dev, &ram_slots); 2085 if (err < 0) { 2086 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2087 return -EPROTO; 2088 } 2089 2090 if (ram_slots < u->user->memory_slots) { 2091 error_setg(errp, "The backend specified a max ram slots limit " 2092 "of %" PRIu64", when the prior validated limit was " 2093 "%d. This limit should never decrease.", ram_slots, 2094 u->user->memory_slots); 2095 return -EINVAL; 2096 } 2097 2098 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2099 } 2100 } 2101 2102 if (dev->migration_blocker == NULL && 2103 !virtio_has_feature(dev->protocol_features, 2104 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2105 error_setg(&dev->migration_blocker, 2106 "Migration disabled: vhost-user backend lacks " 2107 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2108 } 2109 2110 if (dev->vq_index == 0) { 2111 err = vhost_setup_backend_channel(dev); 2112 if (err < 0) { 2113 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2114 return -EPROTO; 2115 } 2116 } 2117 2118 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2119 postcopy_add_notifier(&u->postcopy_notifier); 2120 2121 return 0; 2122 } 2123 2124 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2125 { 2126 struct vhost_user *u; 2127 2128 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2129 2130 u = dev->opaque; 2131 if (u->postcopy_notifier.notify) { 2132 postcopy_remove_notifier(&u->postcopy_notifier); 2133 u->postcopy_notifier.notify = NULL; 2134 } 2135 u->postcopy_listen = false; 2136 if (u->postcopy_fd.handler) { 2137 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2138 close(u->postcopy_fd.fd); 2139 u->postcopy_fd.handler = NULL; 2140 } 2141 if (u->backend_ioc) { 2142 close_backend_channel(u); 2143 } 2144 g_free(u->region_rb); 2145 u->region_rb = NULL; 2146 g_free(u->region_rb_offset); 2147 u->region_rb_offset = NULL; 2148 u->region_rb_len = 0; 2149 g_free(u); 2150 dev->opaque = 0; 2151 2152 return 0; 2153 } 2154 2155 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2156 { 2157 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2158 2159 return idx; 2160 } 2161 2162 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2163 { 2164 struct vhost_user *u = dev->opaque; 2165 2166 return u->user->memory_slots; 2167 } 2168 2169 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2170 { 2171 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2172 2173 return virtio_has_feature(dev->protocol_features, 2174 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2175 } 2176 2177 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2178 { 2179 VhostUserMsg msg = { }; 2180 2181 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2182 2183 /* If guest supports GUEST_ANNOUNCE do nothing */ 2184 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2185 return 0; 2186 } 2187 2188 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2189 if (virtio_has_feature(dev->protocol_features, 2190 VHOST_USER_PROTOCOL_F_RARP)) { 2191 msg.hdr.request = VHOST_USER_SEND_RARP; 2192 msg.hdr.flags = VHOST_USER_VERSION; 2193 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2194 msg.hdr.size = sizeof(msg.payload.u64); 2195 2196 return vhost_user_write(dev, &msg, NULL, 0); 2197 } 2198 return -ENOTSUP; 2199 } 2200 2201 static bool vhost_user_can_merge(struct vhost_dev *dev, 2202 uint64_t start1, uint64_t size1, 2203 uint64_t start2, uint64_t size2) 2204 { 2205 ram_addr_t offset; 2206 int mfd, rfd; 2207 2208 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2209 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2210 2211 return mfd == rfd; 2212 } 2213 2214 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2215 { 2216 VhostUserMsg msg; 2217 bool reply_supported = virtio_has_feature(dev->protocol_features, 2218 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2219 int ret; 2220 2221 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2222 return 0; 2223 } 2224 2225 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2226 msg.payload.u64 = mtu; 2227 msg.hdr.size = sizeof(msg.payload.u64); 2228 msg.hdr.flags = VHOST_USER_VERSION; 2229 if (reply_supported) { 2230 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2231 } 2232 2233 ret = vhost_user_write(dev, &msg, NULL, 0); 2234 if (ret < 0) { 2235 return ret; 2236 } 2237 2238 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2239 if (reply_supported) { 2240 return process_message_reply(dev, &msg); 2241 } 2242 2243 return 0; 2244 } 2245 2246 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2247 struct vhost_iotlb_msg *imsg) 2248 { 2249 int ret; 2250 VhostUserMsg msg = { 2251 .hdr.request = VHOST_USER_IOTLB_MSG, 2252 .hdr.size = sizeof(msg.payload.iotlb), 2253 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2254 .payload.iotlb = *imsg, 2255 }; 2256 2257 ret = vhost_user_write(dev, &msg, NULL, 0); 2258 if (ret < 0) { 2259 return ret; 2260 } 2261 2262 return process_message_reply(dev, &msg); 2263 } 2264 2265 2266 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2267 { 2268 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2269 } 2270 2271 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2272 uint32_t config_len, Error **errp) 2273 { 2274 int ret; 2275 VhostUserMsg msg = { 2276 .hdr.request = VHOST_USER_GET_CONFIG, 2277 .hdr.flags = VHOST_USER_VERSION, 2278 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2279 }; 2280 2281 if (!virtio_has_feature(dev->protocol_features, 2282 VHOST_USER_PROTOCOL_F_CONFIG)) { 2283 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2284 return -EINVAL; 2285 } 2286 2287 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2288 2289 msg.payload.config.offset = 0; 2290 msg.payload.config.size = config_len; 2291 ret = vhost_user_write(dev, &msg, NULL, 0); 2292 if (ret < 0) { 2293 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2294 return ret; 2295 } 2296 2297 ret = vhost_user_read(dev, &msg); 2298 if (ret < 0) { 2299 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2300 return ret; 2301 } 2302 2303 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2304 error_setg(errp, 2305 "Received unexpected msg type. Expected %d received %d", 2306 VHOST_USER_GET_CONFIG, msg.hdr.request); 2307 return -EPROTO; 2308 } 2309 2310 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2311 error_setg(errp, "Received bad msg size."); 2312 return -EPROTO; 2313 } 2314 2315 memcpy(config, msg.payload.config.region, config_len); 2316 2317 return 0; 2318 } 2319 2320 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2321 uint32_t offset, uint32_t size, uint32_t flags) 2322 { 2323 int ret; 2324 uint8_t *p; 2325 bool reply_supported = virtio_has_feature(dev->protocol_features, 2326 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2327 2328 VhostUserMsg msg = { 2329 .hdr.request = VHOST_USER_SET_CONFIG, 2330 .hdr.flags = VHOST_USER_VERSION, 2331 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2332 }; 2333 2334 if (!virtio_has_feature(dev->protocol_features, 2335 VHOST_USER_PROTOCOL_F_CONFIG)) { 2336 return -ENOTSUP; 2337 } 2338 2339 if (reply_supported) { 2340 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2341 } 2342 2343 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2344 return -EINVAL; 2345 } 2346 2347 msg.payload.config.offset = offset, 2348 msg.payload.config.size = size, 2349 msg.payload.config.flags = flags, 2350 p = msg.payload.config.region; 2351 memcpy(p, data, size); 2352 2353 ret = vhost_user_write(dev, &msg, NULL, 0); 2354 if (ret < 0) { 2355 return ret; 2356 } 2357 2358 if (reply_supported) { 2359 return process_message_reply(dev, &msg); 2360 } 2361 2362 return 0; 2363 } 2364 2365 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2366 void *session_info, 2367 uint64_t *session_id) 2368 { 2369 int ret; 2370 bool crypto_session = virtio_has_feature(dev->protocol_features, 2371 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2372 CryptoDevBackendSessionInfo *backend_info = session_info; 2373 VhostUserMsg msg = { 2374 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2375 .hdr.flags = VHOST_USER_VERSION, 2376 .hdr.size = sizeof(msg.payload.session), 2377 }; 2378 2379 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2380 2381 if (!crypto_session) { 2382 error_report("vhost-user trying to send unhandled ioctl"); 2383 return -ENOTSUP; 2384 } 2385 2386 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2387 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2388 size_t keylen; 2389 2390 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2391 sizeof(CryptoDevBackendAsymSessionInfo)); 2392 if (sess->keylen) { 2393 keylen = sizeof(msg.payload.session.u.asym.key); 2394 if (sess->keylen > keylen) { 2395 error_report("Unsupported asymmetric key size"); 2396 return -ENOTSUP; 2397 } 2398 2399 memcpy(&msg.payload.session.u.asym.key, sess->key, 2400 sess->keylen); 2401 } 2402 } else { 2403 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2404 size_t keylen; 2405 2406 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2407 sizeof(CryptoDevBackendSymSessionInfo)); 2408 if (sess->key_len) { 2409 keylen = sizeof(msg.payload.session.u.sym.key); 2410 if (sess->key_len > keylen) { 2411 error_report("Unsupported cipher key size"); 2412 return -ENOTSUP; 2413 } 2414 2415 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2416 sess->key_len); 2417 } 2418 2419 if (sess->auth_key_len > 0) { 2420 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2421 if (sess->auth_key_len > keylen) { 2422 error_report("Unsupported auth key size"); 2423 return -ENOTSUP; 2424 } 2425 2426 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2427 sess->auth_key_len); 2428 } 2429 } 2430 2431 msg.payload.session.op_code = backend_info->op_code; 2432 msg.payload.session.session_id = backend_info->session_id; 2433 ret = vhost_user_write(dev, &msg, NULL, 0); 2434 if (ret < 0) { 2435 error_report("vhost_user_write() return %d, create session failed", 2436 ret); 2437 return ret; 2438 } 2439 2440 ret = vhost_user_read(dev, &msg); 2441 if (ret < 0) { 2442 error_report("vhost_user_read() return %d, create session failed", 2443 ret); 2444 return ret; 2445 } 2446 2447 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2448 error_report("Received unexpected msg type. Expected %d received %d", 2449 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2450 return -EPROTO; 2451 } 2452 2453 if (msg.hdr.size != sizeof(msg.payload.session)) { 2454 error_report("Received bad msg size."); 2455 return -EPROTO; 2456 } 2457 2458 if (msg.payload.session.session_id < 0) { 2459 error_report("Bad session id: %" PRId64 "", 2460 msg.payload.session.session_id); 2461 return -EINVAL; 2462 } 2463 *session_id = msg.payload.session.session_id; 2464 2465 return 0; 2466 } 2467 2468 static int 2469 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2470 { 2471 int ret; 2472 bool crypto_session = virtio_has_feature(dev->protocol_features, 2473 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2474 VhostUserMsg msg = { 2475 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2476 .hdr.flags = VHOST_USER_VERSION, 2477 .hdr.size = sizeof(msg.payload.u64), 2478 }; 2479 msg.payload.u64 = session_id; 2480 2481 if (!crypto_session) { 2482 error_report("vhost-user trying to send unhandled ioctl"); 2483 return -ENOTSUP; 2484 } 2485 2486 ret = vhost_user_write(dev, &msg, NULL, 0); 2487 if (ret < 0) { 2488 error_report("vhost_user_write() return %d, close session failed", 2489 ret); 2490 return ret; 2491 } 2492 2493 return 0; 2494 } 2495 2496 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2497 MemoryRegionSection *section) 2498 { 2499 return memory_region_get_fd(section->mr) >= 0; 2500 } 2501 2502 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2503 uint16_t queue_size, 2504 struct vhost_inflight *inflight) 2505 { 2506 void *addr; 2507 int fd; 2508 int ret; 2509 struct vhost_user *u = dev->opaque; 2510 CharBackend *chr = u->user->chr; 2511 VhostUserMsg msg = { 2512 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2513 .hdr.flags = VHOST_USER_VERSION, 2514 .payload.inflight.num_queues = dev->nvqs, 2515 .payload.inflight.queue_size = queue_size, 2516 .hdr.size = sizeof(msg.payload.inflight), 2517 }; 2518 2519 if (!virtio_has_feature(dev->protocol_features, 2520 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2521 return 0; 2522 } 2523 2524 ret = vhost_user_write(dev, &msg, NULL, 0); 2525 if (ret < 0) { 2526 return ret; 2527 } 2528 2529 ret = vhost_user_read(dev, &msg); 2530 if (ret < 0) { 2531 return ret; 2532 } 2533 2534 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2535 error_report("Received unexpected msg type. " 2536 "Expected %d received %d", 2537 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2538 return -EPROTO; 2539 } 2540 2541 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2542 error_report("Received bad msg size."); 2543 return -EPROTO; 2544 } 2545 2546 if (!msg.payload.inflight.mmap_size) { 2547 return 0; 2548 } 2549 2550 fd = qemu_chr_fe_get_msgfd(chr); 2551 if (fd < 0) { 2552 error_report("Failed to get mem fd"); 2553 return -EIO; 2554 } 2555 2556 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2557 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2558 2559 if (addr == MAP_FAILED) { 2560 error_report("Failed to mmap mem fd"); 2561 close(fd); 2562 return -EFAULT; 2563 } 2564 2565 inflight->addr = addr; 2566 inflight->fd = fd; 2567 inflight->size = msg.payload.inflight.mmap_size; 2568 inflight->offset = msg.payload.inflight.mmap_offset; 2569 inflight->queue_size = queue_size; 2570 2571 return 0; 2572 } 2573 2574 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2575 struct vhost_inflight *inflight) 2576 { 2577 VhostUserMsg msg = { 2578 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2579 .hdr.flags = VHOST_USER_VERSION, 2580 .payload.inflight.mmap_size = inflight->size, 2581 .payload.inflight.mmap_offset = inflight->offset, 2582 .payload.inflight.num_queues = dev->nvqs, 2583 .payload.inflight.queue_size = inflight->queue_size, 2584 .hdr.size = sizeof(msg.payload.inflight), 2585 }; 2586 2587 if (!virtio_has_feature(dev->protocol_features, 2588 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2589 return 0; 2590 } 2591 2592 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2593 } 2594 2595 static void vhost_user_state_destroy(gpointer data) 2596 { 2597 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2598 if (n) { 2599 vhost_user_host_notifier_remove(n, NULL); 2600 object_unparent(OBJECT(&n->mr)); 2601 /* 2602 * We can't free until vhost_user_host_notifier_remove has 2603 * done it's thing so schedule the free with RCU. 2604 */ 2605 g_free_rcu(n, rcu); 2606 } 2607 } 2608 2609 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2610 { 2611 if (user->chr) { 2612 error_setg(errp, "Cannot initialize vhost-user state"); 2613 return false; 2614 } 2615 user->chr = chr; 2616 user->memory_slots = 0; 2617 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2618 &vhost_user_state_destroy); 2619 return true; 2620 } 2621 2622 void vhost_user_cleanup(VhostUserState *user) 2623 { 2624 if (!user->chr) { 2625 return; 2626 } 2627 memory_region_transaction_begin(); 2628 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2629 memory_region_transaction_commit(); 2630 user->chr = NULL; 2631 } 2632 2633 2634 typedef struct { 2635 vu_async_close_fn cb; 2636 DeviceState *dev; 2637 CharBackend *cd; 2638 struct vhost_dev *vhost; 2639 } VhostAsyncCallback; 2640 2641 static void vhost_user_async_close_bh(void *opaque) 2642 { 2643 VhostAsyncCallback *data = opaque; 2644 struct vhost_dev *vhost = data->vhost; 2645 2646 /* 2647 * If the vhost_dev has been cleared in the meantime there is 2648 * nothing left to do as some other path has completed the 2649 * cleanup. 2650 */ 2651 if (vhost->vdev) { 2652 data->cb(data->dev); 2653 } 2654 2655 g_free(data); 2656 } 2657 2658 /* 2659 * We only schedule the work if the machine is running. If suspended 2660 * we want to keep all the in-flight data as is for migration 2661 * purposes. 2662 */ 2663 void vhost_user_async_close(DeviceState *d, 2664 CharBackend *chardev, struct vhost_dev *vhost, 2665 vu_async_close_fn cb) 2666 { 2667 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2668 /* 2669 * A close event may happen during a read/write, but vhost 2670 * code assumes the vhost_dev remains setup, so delay the 2671 * stop & clear. 2672 */ 2673 AioContext *ctx = qemu_get_current_aio_context(); 2674 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2675 2676 /* Save data for the callback */ 2677 data->cb = cb; 2678 data->dev = d; 2679 data->cd = chardev; 2680 data->vhost = vhost; 2681 2682 /* Disable any further notifications on the chardev */ 2683 qemu_chr_fe_set_handlers(chardev, 2684 NULL, NULL, NULL, NULL, NULL, NULL, 2685 false); 2686 2687 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2688 2689 /* 2690 * Move vhost device to the stopped state. The vhost-user device 2691 * will be clean up and disconnected in BH. This can be useful in 2692 * the vhost migration code. If disconnect was caught there is an 2693 * option for the general vhost code to get the dev state without 2694 * knowing its type (in this case vhost-user). 2695 * 2696 * Note if the vhost device is fully cleared by the time we 2697 * execute the bottom half we won't continue with the cleanup. 2698 */ 2699 vhost->started = false; 2700 } 2701 } 2702 2703 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2704 { 2705 if (!virtio_has_feature(dev->protocol_features, 2706 VHOST_USER_PROTOCOL_F_STATUS)) { 2707 return 0; 2708 } 2709 2710 /* Set device status only for last queue pair */ 2711 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2712 return 0; 2713 } 2714 2715 if (started) { 2716 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2717 VIRTIO_CONFIG_S_DRIVER | 2718 VIRTIO_CONFIG_S_DRIVER_OK); 2719 } else { 2720 return 0; 2721 } 2722 } 2723 2724 static void vhost_user_reset_status(struct vhost_dev *dev) 2725 { 2726 /* Set device status only for last queue pair */ 2727 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2728 return; 2729 } 2730 2731 if (virtio_has_feature(dev->protocol_features, 2732 VHOST_USER_PROTOCOL_F_STATUS)) { 2733 vhost_user_set_status(dev, 0); 2734 } 2735 } 2736 2737 const VhostOps user_ops = { 2738 .backend_type = VHOST_BACKEND_TYPE_USER, 2739 .vhost_backend_init = vhost_user_backend_init, 2740 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2741 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2742 .vhost_set_log_base = vhost_user_set_log_base, 2743 .vhost_set_mem_table = vhost_user_set_mem_table, 2744 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2745 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2746 .vhost_set_vring_num = vhost_user_set_vring_num, 2747 .vhost_set_vring_base = vhost_user_set_vring_base, 2748 .vhost_get_vring_base = vhost_user_get_vring_base, 2749 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2750 .vhost_set_vring_call = vhost_user_set_vring_call, 2751 .vhost_set_vring_err = vhost_user_set_vring_err, 2752 .vhost_set_features = vhost_user_set_features, 2753 .vhost_get_features = vhost_user_get_features, 2754 .vhost_set_owner = vhost_user_set_owner, 2755 .vhost_reset_device = vhost_user_reset_device, 2756 .vhost_get_vq_index = vhost_user_get_vq_index, 2757 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2758 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2759 .vhost_migration_done = vhost_user_migration_done, 2760 .vhost_backend_can_merge = vhost_user_can_merge, 2761 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2762 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2763 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2764 .vhost_get_config = vhost_user_get_config, 2765 .vhost_set_config = vhost_user_set_config, 2766 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2767 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2768 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2769 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2770 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2771 .vhost_dev_start = vhost_user_dev_start, 2772 .vhost_reset_status = vhost_user_reset_status, 2773 }; 2774