1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/cryptodev.h" 26 #include "migration/migration.h" 27 #include "migration/postcopy-ram.h" 28 #include "trace.h" 29 #include "exec/ramblock.h" 30 31 #include <sys/ioctl.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 35 #include "standard-headers/linux/vhost_types.h" 36 37 #ifdef CONFIG_LINUX 38 #include <linux/userfaultfd.h> 39 #endif 40 41 #define VHOST_MEMORY_BASELINE_NREGIONS 8 42 #define VHOST_USER_F_PROTOCOL_FEATURES 30 43 #define VHOST_USER_SLAVE_MAX_FDS 8 44 45 /* 46 * Set maximum number of RAM slots supported to 47 * the maximum number supported by the target 48 * hardware plaform. 49 */ 50 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 51 defined(TARGET_ARM) || defined(TARGET_ARM_64) 52 #include "hw/acpi/acpi.h" 53 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 54 55 #elif defined(TARGET_PPC) || defined(TARGET_PPC64) 56 #include "hw/ppc/spapr.h" 57 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 58 59 #else 60 #define VHOST_USER_MAX_RAM_SLOTS 512 61 #endif 62 63 /* 64 * Maximum size of virtio device config space 65 */ 66 #define VHOST_USER_MAX_CONFIG_SIZE 256 67 68 enum VhostUserProtocolFeature { 69 VHOST_USER_PROTOCOL_F_MQ = 0, 70 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 71 VHOST_USER_PROTOCOL_F_RARP = 2, 72 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 73 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 74 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 75 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 76 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 77 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 78 VHOST_USER_PROTOCOL_F_CONFIG = 9, 79 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 80 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 81 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 82 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 83 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 84 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 85 VHOST_USER_PROTOCOL_F_STATUS = 16, 86 VHOST_USER_PROTOCOL_F_MAX 87 }; 88 89 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 90 91 typedef enum VhostUserRequest { 92 VHOST_USER_NONE = 0, 93 VHOST_USER_GET_FEATURES = 1, 94 VHOST_USER_SET_FEATURES = 2, 95 VHOST_USER_SET_OWNER = 3, 96 VHOST_USER_RESET_OWNER = 4, 97 VHOST_USER_SET_MEM_TABLE = 5, 98 VHOST_USER_SET_LOG_BASE = 6, 99 VHOST_USER_SET_LOG_FD = 7, 100 VHOST_USER_SET_VRING_NUM = 8, 101 VHOST_USER_SET_VRING_ADDR = 9, 102 VHOST_USER_SET_VRING_BASE = 10, 103 VHOST_USER_GET_VRING_BASE = 11, 104 VHOST_USER_SET_VRING_KICK = 12, 105 VHOST_USER_SET_VRING_CALL = 13, 106 VHOST_USER_SET_VRING_ERR = 14, 107 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 108 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 109 VHOST_USER_GET_QUEUE_NUM = 17, 110 VHOST_USER_SET_VRING_ENABLE = 18, 111 VHOST_USER_SEND_RARP = 19, 112 VHOST_USER_NET_SET_MTU = 20, 113 VHOST_USER_SET_SLAVE_REQ_FD = 21, 114 VHOST_USER_IOTLB_MSG = 22, 115 VHOST_USER_SET_VRING_ENDIAN = 23, 116 VHOST_USER_GET_CONFIG = 24, 117 VHOST_USER_SET_CONFIG = 25, 118 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 119 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 120 VHOST_USER_POSTCOPY_ADVISE = 28, 121 VHOST_USER_POSTCOPY_LISTEN = 29, 122 VHOST_USER_POSTCOPY_END = 30, 123 VHOST_USER_GET_INFLIGHT_FD = 31, 124 VHOST_USER_SET_INFLIGHT_FD = 32, 125 VHOST_USER_GPU_SET_SOCKET = 33, 126 VHOST_USER_RESET_DEVICE = 34, 127 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 128 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 129 VHOST_USER_ADD_MEM_REG = 37, 130 VHOST_USER_REM_MEM_REG = 38, 131 VHOST_USER_SET_STATUS = 39, 132 VHOST_USER_GET_STATUS = 40, 133 VHOST_USER_MAX 134 } VhostUserRequest; 135 136 typedef enum VhostUserSlaveRequest { 137 VHOST_USER_SLAVE_NONE = 0, 138 VHOST_USER_SLAVE_IOTLB_MSG = 1, 139 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 140 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 141 VHOST_USER_SLAVE_MAX 142 } VhostUserSlaveRequest; 143 144 typedef struct VhostUserMemoryRegion { 145 uint64_t guest_phys_addr; 146 uint64_t memory_size; 147 uint64_t userspace_addr; 148 uint64_t mmap_offset; 149 } VhostUserMemoryRegion; 150 151 typedef struct VhostUserMemory { 152 uint32_t nregions; 153 uint32_t padding; 154 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 155 } VhostUserMemory; 156 157 typedef struct VhostUserMemRegMsg { 158 uint64_t padding; 159 VhostUserMemoryRegion region; 160 } VhostUserMemRegMsg; 161 162 typedef struct VhostUserLog { 163 uint64_t mmap_size; 164 uint64_t mmap_offset; 165 } VhostUserLog; 166 167 typedef struct VhostUserConfig { 168 uint32_t offset; 169 uint32_t size; 170 uint32_t flags; 171 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 172 } VhostUserConfig; 173 174 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 175 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 176 177 typedef struct VhostUserCryptoSession { 178 /* session id for success, -1 on errors */ 179 int64_t session_id; 180 CryptoDevBackendSymSessionInfo session_setup_data; 181 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 182 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 183 } VhostUserCryptoSession; 184 185 static VhostUserConfig c __attribute__ ((unused)); 186 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 187 + sizeof(c.size) \ 188 + sizeof(c.flags)) 189 190 typedef struct VhostUserVringArea { 191 uint64_t u64; 192 uint64_t size; 193 uint64_t offset; 194 } VhostUserVringArea; 195 196 typedef struct VhostUserInflight { 197 uint64_t mmap_size; 198 uint64_t mmap_offset; 199 uint16_t num_queues; 200 uint16_t queue_size; 201 } VhostUserInflight; 202 203 typedef struct { 204 VhostUserRequest request; 205 206 #define VHOST_USER_VERSION_MASK (0x3) 207 #define VHOST_USER_REPLY_MASK (0x1 << 2) 208 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 209 uint32_t flags; 210 uint32_t size; /* the following payload size */ 211 } QEMU_PACKED VhostUserHeader; 212 213 typedef union { 214 #define VHOST_USER_VRING_IDX_MASK (0xff) 215 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 216 uint64_t u64; 217 struct vhost_vring_state state; 218 struct vhost_vring_addr addr; 219 VhostUserMemory memory; 220 VhostUserMemRegMsg mem_reg; 221 VhostUserLog log; 222 struct vhost_iotlb_msg iotlb; 223 VhostUserConfig config; 224 VhostUserCryptoSession session; 225 VhostUserVringArea area; 226 VhostUserInflight inflight; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *slave_ioc; 247 GSource *slave_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static bool ioeventfd_enabled(void) 276 { 277 return !kvm_enabled() || kvm_eventfds_enabled(); 278 } 279 280 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 281 { 282 struct vhost_user *u = dev->opaque; 283 CharBackend *chr = u->user->chr; 284 uint8_t *p = (uint8_t *) msg; 285 int r, size = VHOST_USER_HDR_SIZE; 286 287 r = qemu_chr_fe_read_all(chr, p, size); 288 if (r != size) { 289 int saved_errno = errno; 290 error_report("Failed to read msg header. Read %d instead of %d." 291 " Original request %d.", r, size, msg->hdr.request); 292 return r < 0 ? -saved_errno : -EIO; 293 } 294 295 /* validate received flags */ 296 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 297 error_report("Failed to read msg header." 298 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 299 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 300 return -EPROTO; 301 } 302 303 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 304 305 return 0; 306 } 307 308 struct vhost_user_read_cb_data { 309 struct vhost_dev *dev; 310 VhostUserMsg *msg; 311 GMainLoop *loop; 312 int ret; 313 }; 314 315 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 316 gpointer opaque) 317 { 318 struct vhost_user_read_cb_data *data = opaque; 319 struct vhost_dev *dev = data->dev; 320 VhostUserMsg *msg = data->msg; 321 struct vhost_user *u = dev->opaque; 322 CharBackend *chr = u->user->chr; 323 uint8_t *p = (uint8_t *) msg; 324 int r, size; 325 326 r = vhost_user_read_header(dev, msg); 327 if (r < 0) { 328 data->ret = r; 329 goto end; 330 } 331 332 /* validate message size is sane */ 333 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 334 error_report("Failed to read msg header." 335 " Size %d exceeds the maximum %zu.", msg->hdr.size, 336 VHOST_USER_PAYLOAD_SIZE); 337 data->ret = -EPROTO; 338 goto end; 339 } 340 341 if (msg->hdr.size) { 342 p += VHOST_USER_HDR_SIZE; 343 size = msg->hdr.size; 344 r = qemu_chr_fe_read_all(chr, p, size); 345 if (r != size) { 346 int saved_errno = errno; 347 error_report("Failed to read msg payload." 348 " Read %d instead of %d.", r, msg->hdr.size); 349 data->ret = r < 0 ? -saved_errno : -EIO; 350 goto end; 351 } 352 } 353 354 end: 355 g_main_loop_quit(data->loop); 356 return G_SOURCE_REMOVE; 357 } 358 359 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 360 gpointer opaque); 361 362 /* 363 * This updates the read handler to use a new event loop context. 364 * Event sources are removed from the previous context : this ensures 365 * that events detected in the previous context are purged. They will 366 * be re-detected and processed in the new context. 367 */ 368 static void slave_update_read_handler(struct vhost_dev *dev, 369 GMainContext *ctxt) 370 { 371 struct vhost_user *u = dev->opaque; 372 373 if (!u->slave_ioc) { 374 return; 375 } 376 377 if (u->slave_src) { 378 g_source_destroy(u->slave_src); 379 g_source_unref(u->slave_src); 380 } 381 382 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 383 G_IO_IN | G_IO_HUP, 384 slave_read, dev, NULL, 385 ctxt); 386 } 387 388 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 389 { 390 struct vhost_user *u = dev->opaque; 391 CharBackend *chr = u->user->chr; 392 GMainContext *prev_ctxt = chr->chr->gcontext; 393 GMainContext *ctxt = g_main_context_new(); 394 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 395 struct vhost_user_read_cb_data data = { 396 .dev = dev, 397 .loop = loop, 398 .msg = msg, 399 .ret = 0 400 }; 401 402 /* 403 * We want to be able to monitor the slave channel fd while waiting 404 * for chr I/O. This requires an event loop, but we can't nest the 405 * one to which chr is currently attached : its fd handlers might not 406 * be prepared for re-entrancy. So we create a new one and switch chr 407 * to use it. 408 */ 409 slave_update_read_handler(dev, ctxt); 410 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 411 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 412 413 g_main_loop_run(loop); 414 415 /* 416 * Restore the previous event loop context. This also destroys/recreates 417 * event sources : this guarantees that all pending events in the original 418 * context that have been processed by the nested loop are purged. 419 */ 420 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 421 slave_update_read_handler(dev, NULL); 422 423 g_main_loop_unref(loop); 424 g_main_context_unref(ctxt); 425 426 return data.ret; 427 } 428 429 static int process_message_reply(struct vhost_dev *dev, 430 const VhostUserMsg *msg) 431 { 432 int ret; 433 VhostUserMsg msg_reply; 434 435 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 436 return 0; 437 } 438 439 ret = vhost_user_read(dev, &msg_reply); 440 if (ret < 0) { 441 return ret; 442 } 443 444 if (msg_reply.hdr.request != msg->hdr.request) { 445 error_report("Received unexpected msg type. " 446 "Expected %d received %d", 447 msg->hdr.request, msg_reply.hdr.request); 448 return -EPROTO; 449 } 450 451 return msg_reply.payload.u64 ? -EIO : 0; 452 } 453 454 static bool vhost_user_one_time_request(VhostUserRequest request) 455 { 456 switch (request) { 457 case VHOST_USER_SET_OWNER: 458 case VHOST_USER_RESET_OWNER: 459 case VHOST_USER_SET_MEM_TABLE: 460 case VHOST_USER_GET_QUEUE_NUM: 461 case VHOST_USER_NET_SET_MTU: 462 return true; 463 default: 464 return false; 465 } 466 } 467 468 /* most non-init callers ignore the error */ 469 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 470 int *fds, int fd_num) 471 { 472 struct vhost_user *u = dev->opaque; 473 CharBackend *chr = u->user->chr; 474 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 475 476 /* 477 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 478 * we just need send it once in the first time. For later such 479 * request, we just ignore it. 480 */ 481 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 482 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 483 return 0; 484 } 485 486 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 487 error_report("Failed to set msg fds."); 488 return -EINVAL; 489 } 490 491 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 492 if (ret != size) { 493 int saved_errno = errno; 494 error_report("Failed to write msg." 495 " Wrote %d instead of %d.", ret, size); 496 return ret < 0 ? -saved_errno : -EIO; 497 } 498 499 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 500 501 return 0; 502 } 503 504 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 505 { 506 VhostUserMsg msg = { 507 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 508 .hdr.flags = VHOST_USER_VERSION, 509 }; 510 511 return vhost_user_write(dev, &msg, &fd, 1); 512 } 513 514 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 515 struct vhost_log *log) 516 { 517 int fds[VHOST_USER_MAX_RAM_SLOTS]; 518 size_t fd_num = 0; 519 bool shmfd = virtio_has_feature(dev->protocol_features, 520 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 521 int ret; 522 VhostUserMsg msg = { 523 .hdr.request = VHOST_USER_SET_LOG_BASE, 524 .hdr.flags = VHOST_USER_VERSION, 525 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 526 .payload.log.mmap_offset = 0, 527 .hdr.size = sizeof(msg.payload.log), 528 }; 529 530 if (shmfd && log->fd != -1) { 531 fds[fd_num++] = log->fd; 532 } 533 534 ret = vhost_user_write(dev, &msg, fds, fd_num); 535 if (ret < 0) { 536 return ret; 537 } 538 539 if (shmfd) { 540 msg.hdr.size = 0; 541 ret = vhost_user_read(dev, &msg); 542 if (ret < 0) { 543 return ret; 544 } 545 546 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 547 error_report("Received unexpected msg type. " 548 "Expected %d received %d", 549 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 550 return -EPROTO; 551 } 552 } 553 554 return 0; 555 } 556 557 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 558 int *fd) 559 { 560 MemoryRegion *mr; 561 562 assert((uintptr_t)addr == addr); 563 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 564 *fd = memory_region_get_fd(mr); 565 566 return mr; 567 } 568 569 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 570 struct vhost_memory_region *src, 571 uint64_t mmap_offset) 572 { 573 assert(src != NULL && dst != NULL); 574 dst->userspace_addr = src->userspace_addr; 575 dst->memory_size = src->memory_size; 576 dst->guest_phys_addr = src->guest_phys_addr; 577 dst->mmap_offset = mmap_offset; 578 } 579 580 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 581 struct vhost_dev *dev, 582 VhostUserMsg *msg, 583 int *fds, size_t *fd_num, 584 bool track_ramblocks) 585 { 586 int i, fd; 587 ram_addr_t offset; 588 MemoryRegion *mr; 589 struct vhost_memory_region *reg; 590 VhostUserMemoryRegion region_buffer; 591 592 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 593 594 for (i = 0; i < dev->mem->nregions; ++i) { 595 reg = dev->mem->regions + i; 596 597 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 598 if (fd > 0) { 599 if (track_ramblocks) { 600 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 601 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 602 reg->memory_size, 603 reg->guest_phys_addr, 604 reg->userspace_addr, 605 offset); 606 u->region_rb_offset[i] = offset; 607 u->region_rb[i] = mr->ram_block; 608 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 609 error_report("Failed preparing vhost-user memory table msg"); 610 return -ENOBUFS; 611 } 612 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 613 msg->payload.memory.regions[*fd_num] = region_buffer; 614 fds[(*fd_num)++] = fd; 615 } else if (track_ramblocks) { 616 u->region_rb_offset[i] = 0; 617 u->region_rb[i] = NULL; 618 } 619 } 620 621 msg->payload.memory.nregions = *fd_num; 622 623 if (!*fd_num) { 624 error_report("Failed initializing vhost-user memory map, " 625 "consider using -object memory-backend-file share=on"); 626 return -EINVAL; 627 } 628 629 msg->hdr.size = sizeof(msg->payload.memory.nregions); 630 msg->hdr.size += sizeof(msg->payload.memory.padding); 631 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 632 633 return 0; 634 } 635 636 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 637 struct vhost_memory_region *vdev_reg) 638 { 639 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 640 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 641 shadow_reg->memory_size == vdev_reg->memory_size; 642 } 643 644 static void scrub_shadow_regions(struct vhost_dev *dev, 645 struct scrub_regions *add_reg, 646 int *nr_add_reg, 647 struct scrub_regions *rem_reg, 648 int *nr_rem_reg, uint64_t *shadow_pcb, 649 bool track_ramblocks) 650 { 651 struct vhost_user *u = dev->opaque; 652 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 653 struct vhost_memory_region *reg, *shadow_reg; 654 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 655 ram_addr_t offset; 656 MemoryRegion *mr; 657 bool matching; 658 659 /* 660 * Find memory regions present in our shadow state which are not in 661 * the device's current memory state. 662 * 663 * Mark regions in both the shadow and device state as "found". 664 */ 665 for (i = 0; i < u->num_shadow_regions; i++) { 666 shadow_reg = &u->shadow_regions[i]; 667 matching = false; 668 669 for (j = 0; j < dev->mem->nregions; j++) { 670 reg = &dev->mem->regions[j]; 671 672 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 673 674 if (reg_equal(shadow_reg, reg)) { 675 matching = true; 676 found[j] = true; 677 if (track_ramblocks) { 678 /* 679 * Reset postcopy client bases, region_rb, and 680 * region_rb_offset in case regions are removed. 681 */ 682 if (fd > 0) { 683 u->region_rb_offset[j] = offset; 684 u->region_rb[j] = mr->ram_block; 685 shadow_pcb[j] = u->postcopy_client_bases[i]; 686 } else { 687 u->region_rb_offset[j] = 0; 688 u->region_rb[j] = NULL; 689 } 690 } 691 break; 692 } 693 } 694 695 /* 696 * If the region was not found in the current device memory state 697 * create an entry for it in the removed list. 698 */ 699 if (!matching) { 700 rem_reg[rm_idx].region = shadow_reg; 701 rem_reg[rm_idx++].reg_idx = i; 702 } 703 } 704 705 /* 706 * For regions not marked "found", create entries in the added list. 707 * 708 * Note their indexes in the device memory state and the indexes of their 709 * file descriptors. 710 */ 711 for (i = 0; i < dev->mem->nregions; i++) { 712 reg = &dev->mem->regions[i]; 713 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 714 if (fd > 0) { 715 ++fd_num; 716 } 717 718 /* 719 * If the region was in both the shadow and device state we don't 720 * need to send a VHOST_USER_ADD_MEM_REG message for it. 721 */ 722 if (found[i]) { 723 continue; 724 } 725 726 add_reg[add_idx].region = reg; 727 add_reg[add_idx].reg_idx = i; 728 add_reg[add_idx++].fd_idx = fd_num; 729 } 730 *nr_rem_reg = rm_idx; 731 *nr_add_reg = add_idx; 732 733 return; 734 } 735 736 static int send_remove_regions(struct vhost_dev *dev, 737 struct scrub_regions *remove_reg, 738 int nr_rem_reg, VhostUserMsg *msg, 739 bool reply_supported) 740 { 741 struct vhost_user *u = dev->opaque; 742 struct vhost_memory_region *shadow_reg; 743 int i, fd, shadow_reg_idx, ret; 744 ram_addr_t offset; 745 VhostUserMemoryRegion region_buffer; 746 747 /* 748 * The regions in remove_reg appear in the same order they do in the 749 * shadow table. Therefore we can minimize memory copies by iterating 750 * through remove_reg backwards. 751 */ 752 for (i = nr_rem_reg - 1; i >= 0; i--) { 753 shadow_reg = remove_reg[i].region; 754 shadow_reg_idx = remove_reg[i].reg_idx; 755 756 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 757 758 if (fd > 0) { 759 msg->hdr.request = VHOST_USER_REM_MEM_REG; 760 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 761 msg->payload.mem_reg.region = region_buffer; 762 763 ret = vhost_user_write(dev, msg, NULL, 0); 764 if (ret < 0) { 765 return ret; 766 } 767 768 if (reply_supported) { 769 ret = process_message_reply(dev, msg); 770 if (ret) { 771 return ret; 772 } 773 } 774 } 775 776 /* 777 * At this point we know the backend has unmapped the region. It is now 778 * safe to remove it from the shadow table. 779 */ 780 memmove(&u->shadow_regions[shadow_reg_idx], 781 &u->shadow_regions[shadow_reg_idx + 1], 782 sizeof(struct vhost_memory_region) * 783 (u->num_shadow_regions - shadow_reg_idx - 1)); 784 u->num_shadow_regions--; 785 } 786 787 return 0; 788 } 789 790 static int send_add_regions(struct vhost_dev *dev, 791 struct scrub_regions *add_reg, int nr_add_reg, 792 VhostUserMsg *msg, uint64_t *shadow_pcb, 793 bool reply_supported, bool track_ramblocks) 794 { 795 struct vhost_user *u = dev->opaque; 796 int i, fd, ret, reg_idx, reg_fd_idx; 797 struct vhost_memory_region *reg; 798 MemoryRegion *mr; 799 ram_addr_t offset; 800 VhostUserMsg msg_reply; 801 VhostUserMemoryRegion region_buffer; 802 803 for (i = 0; i < nr_add_reg; i++) { 804 reg = add_reg[i].region; 805 reg_idx = add_reg[i].reg_idx; 806 reg_fd_idx = add_reg[i].fd_idx; 807 808 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 809 810 if (fd > 0) { 811 if (track_ramblocks) { 812 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 813 reg->memory_size, 814 reg->guest_phys_addr, 815 reg->userspace_addr, 816 offset); 817 u->region_rb_offset[reg_idx] = offset; 818 u->region_rb[reg_idx] = mr->ram_block; 819 } 820 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 821 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 822 msg->payload.mem_reg.region = region_buffer; 823 824 ret = vhost_user_write(dev, msg, &fd, 1); 825 if (ret < 0) { 826 return ret; 827 } 828 829 if (track_ramblocks) { 830 uint64_t reply_gpa; 831 832 ret = vhost_user_read(dev, &msg_reply); 833 if (ret < 0) { 834 return ret; 835 } 836 837 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 838 839 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 840 error_report("%s: Received unexpected msg type." 841 "Expected %d received %d", __func__, 842 VHOST_USER_ADD_MEM_REG, 843 msg_reply.hdr.request); 844 return -EPROTO; 845 } 846 847 /* 848 * We're using the same structure, just reusing one of the 849 * fields, so it should be the same size. 850 */ 851 if (msg_reply.hdr.size != msg->hdr.size) { 852 error_report("%s: Unexpected size for postcopy reply " 853 "%d vs %d", __func__, msg_reply.hdr.size, 854 msg->hdr.size); 855 return -EPROTO; 856 } 857 858 /* Get the postcopy client base from the backend's reply. */ 859 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 860 shadow_pcb[reg_idx] = 861 msg_reply.payload.mem_reg.region.userspace_addr; 862 trace_vhost_user_set_mem_table_postcopy( 863 msg_reply.payload.mem_reg.region.userspace_addr, 864 msg->payload.mem_reg.region.userspace_addr, 865 reg_fd_idx, reg_idx); 866 } else { 867 error_report("%s: invalid postcopy reply for region. " 868 "Got guest physical address %" PRIX64 ", expected " 869 "%" PRIX64, __func__, reply_gpa, 870 dev->mem->regions[reg_idx].guest_phys_addr); 871 return -EPROTO; 872 } 873 } else if (reply_supported) { 874 ret = process_message_reply(dev, msg); 875 if (ret) { 876 return ret; 877 } 878 } 879 } else if (track_ramblocks) { 880 u->region_rb_offset[reg_idx] = 0; 881 u->region_rb[reg_idx] = NULL; 882 } 883 884 /* 885 * At this point, we know the backend has mapped in the new 886 * region, if the region has a valid file descriptor. 887 * 888 * The region should now be added to the shadow table. 889 */ 890 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 891 reg->guest_phys_addr; 892 u->shadow_regions[u->num_shadow_regions].userspace_addr = 893 reg->userspace_addr; 894 u->shadow_regions[u->num_shadow_regions].memory_size = 895 reg->memory_size; 896 u->num_shadow_regions++; 897 } 898 899 return 0; 900 } 901 902 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 903 VhostUserMsg *msg, 904 bool reply_supported, 905 bool track_ramblocks) 906 { 907 struct vhost_user *u = dev->opaque; 908 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 909 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 910 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 911 int nr_add_reg, nr_rem_reg; 912 int ret; 913 914 msg->hdr.size = sizeof(msg->payload.mem_reg); 915 916 /* Find the regions which need to be removed or added. */ 917 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 918 shadow_pcb, track_ramblocks); 919 920 if (nr_rem_reg) { 921 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 922 reply_supported); 923 if (ret < 0) { 924 goto err; 925 } 926 } 927 928 if (nr_add_reg) { 929 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 930 reply_supported, track_ramblocks); 931 if (ret < 0) { 932 goto err; 933 } 934 } 935 936 if (track_ramblocks) { 937 memcpy(u->postcopy_client_bases, shadow_pcb, 938 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 939 /* 940 * Now we've registered this with the postcopy code, we ack to the 941 * client, because now we're in the position to be able to deal with 942 * any faults it generates. 943 */ 944 /* TODO: Use this for failure cases as well with a bad value. */ 945 msg->hdr.size = sizeof(msg->payload.u64); 946 msg->payload.u64 = 0; /* OK */ 947 948 ret = vhost_user_write(dev, msg, NULL, 0); 949 if (ret < 0) { 950 return ret; 951 } 952 } 953 954 return 0; 955 956 err: 957 if (track_ramblocks) { 958 memcpy(u->postcopy_client_bases, shadow_pcb, 959 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 960 } 961 962 return ret; 963 } 964 965 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 966 struct vhost_memory *mem, 967 bool reply_supported, 968 bool config_mem_slots) 969 { 970 struct vhost_user *u = dev->opaque; 971 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 972 size_t fd_num = 0; 973 VhostUserMsg msg_reply; 974 int region_i, msg_i; 975 int ret; 976 977 VhostUserMsg msg = { 978 .hdr.flags = VHOST_USER_VERSION, 979 }; 980 981 if (u->region_rb_len < dev->mem->nregions) { 982 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 983 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 984 dev->mem->nregions); 985 memset(&(u->region_rb[u->region_rb_len]), '\0', 986 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 987 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 988 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 989 u->region_rb_len = dev->mem->nregions; 990 } 991 992 if (config_mem_slots) { 993 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 994 if (ret < 0) { 995 return ret; 996 } 997 } else { 998 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 999 true); 1000 if (ret < 0) { 1001 return ret; 1002 } 1003 1004 ret = vhost_user_write(dev, &msg, fds, fd_num); 1005 if (ret < 0) { 1006 return ret; 1007 } 1008 1009 ret = vhost_user_read(dev, &msg_reply); 1010 if (ret < 0) { 1011 return ret; 1012 } 1013 1014 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 1015 error_report("%s: Received unexpected msg type." 1016 "Expected %d received %d", __func__, 1017 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 1018 return -EPROTO; 1019 } 1020 1021 /* 1022 * We're using the same structure, just reusing one of the 1023 * fields, so it should be the same size. 1024 */ 1025 if (msg_reply.hdr.size != msg.hdr.size) { 1026 error_report("%s: Unexpected size for postcopy reply " 1027 "%d vs %d", __func__, msg_reply.hdr.size, 1028 msg.hdr.size); 1029 return -EPROTO; 1030 } 1031 1032 memset(u->postcopy_client_bases, 0, 1033 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1034 1035 /* 1036 * They're in the same order as the regions that were sent 1037 * but some of the regions were skipped (above) if they 1038 * didn't have fd's 1039 */ 1040 for (msg_i = 0, region_i = 0; 1041 region_i < dev->mem->nregions; 1042 region_i++) { 1043 if (msg_i < fd_num && 1044 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1045 dev->mem->regions[region_i].guest_phys_addr) { 1046 u->postcopy_client_bases[region_i] = 1047 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1048 trace_vhost_user_set_mem_table_postcopy( 1049 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1050 msg.payload.memory.regions[msg_i].userspace_addr, 1051 msg_i, region_i); 1052 msg_i++; 1053 } 1054 } 1055 if (msg_i != fd_num) { 1056 error_report("%s: postcopy reply not fully consumed " 1057 "%d vs %zd", 1058 __func__, msg_i, fd_num); 1059 return -EIO; 1060 } 1061 1062 /* 1063 * Now we've registered this with the postcopy code, we ack to the 1064 * client, because now we're in the position to be able to deal 1065 * with any faults it generates. 1066 */ 1067 /* TODO: Use this for failure cases as well with a bad value. */ 1068 msg.hdr.size = sizeof(msg.payload.u64); 1069 msg.payload.u64 = 0; /* OK */ 1070 ret = vhost_user_write(dev, &msg, NULL, 0); 1071 if (ret < 0) { 1072 return ret; 1073 } 1074 } 1075 1076 return 0; 1077 } 1078 1079 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1080 struct vhost_memory *mem) 1081 { 1082 struct vhost_user *u = dev->opaque; 1083 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1084 size_t fd_num = 0; 1085 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1086 bool reply_supported = virtio_has_feature(dev->protocol_features, 1087 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1088 bool config_mem_slots = 1089 virtio_has_feature(dev->protocol_features, 1090 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1091 int ret; 1092 1093 if (do_postcopy) { 1094 /* 1095 * Postcopy has enough differences that it's best done in it's own 1096 * version 1097 */ 1098 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1099 config_mem_slots); 1100 } 1101 1102 VhostUserMsg msg = { 1103 .hdr.flags = VHOST_USER_VERSION, 1104 }; 1105 1106 if (reply_supported) { 1107 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1108 } 1109 1110 if (config_mem_slots) { 1111 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1112 if (ret < 0) { 1113 return ret; 1114 } 1115 } else { 1116 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1117 false); 1118 if (ret < 0) { 1119 return ret; 1120 } 1121 1122 ret = vhost_user_write(dev, &msg, fds, fd_num); 1123 if (ret < 0) { 1124 return ret; 1125 } 1126 1127 if (reply_supported) { 1128 return process_message_reply(dev, &msg); 1129 } 1130 } 1131 1132 return 0; 1133 } 1134 1135 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1136 struct vhost_vring_state *ring) 1137 { 1138 bool cross_endian = virtio_has_feature(dev->protocol_features, 1139 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1140 VhostUserMsg msg = { 1141 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1142 .hdr.flags = VHOST_USER_VERSION, 1143 .payload.state = *ring, 1144 .hdr.size = sizeof(msg.payload.state), 1145 }; 1146 1147 if (!cross_endian) { 1148 error_report("vhost-user trying to send unhandled ioctl"); 1149 return -ENOTSUP; 1150 } 1151 1152 return vhost_user_write(dev, &msg, NULL, 0); 1153 } 1154 1155 static int vhost_set_vring(struct vhost_dev *dev, 1156 unsigned long int request, 1157 struct vhost_vring_state *ring) 1158 { 1159 VhostUserMsg msg = { 1160 .hdr.request = request, 1161 .hdr.flags = VHOST_USER_VERSION, 1162 .payload.state = *ring, 1163 .hdr.size = sizeof(msg.payload.state), 1164 }; 1165 1166 return vhost_user_write(dev, &msg, NULL, 0); 1167 } 1168 1169 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1170 struct vhost_vring_state *ring) 1171 { 1172 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1173 } 1174 1175 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1176 { 1177 assert(n && n->unmap_addr); 1178 munmap(n->unmap_addr, qemu_real_host_page_size()); 1179 n->unmap_addr = NULL; 1180 } 1181 1182 /* 1183 * clean-up function for notifier, will finally free the structure 1184 * under rcu. 1185 */ 1186 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1187 VirtIODevice *vdev) 1188 { 1189 if (n->addr) { 1190 if (vdev) { 1191 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1192 } 1193 assert(!n->unmap_addr); 1194 n->unmap_addr = n->addr; 1195 n->addr = NULL; 1196 call_rcu(n, vhost_user_host_notifier_free, rcu); 1197 } 1198 } 1199 1200 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1201 struct vhost_vring_state *ring) 1202 { 1203 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1204 } 1205 1206 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1207 { 1208 int i; 1209 1210 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1211 return -EINVAL; 1212 } 1213 1214 for (i = 0; i < dev->nvqs; ++i) { 1215 int ret; 1216 struct vhost_vring_state state = { 1217 .index = dev->vq_index + i, 1218 .num = enable, 1219 }; 1220 1221 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1222 if (ret < 0) { 1223 /* 1224 * Restoring the previous state is likely infeasible, as well as 1225 * proceeding regardless the error, so just bail out and hope for 1226 * the device-level recovery. 1227 */ 1228 return ret; 1229 } 1230 } 1231 1232 return 0; 1233 } 1234 1235 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1236 int idx) 1237 { 1238 if (idx >= u->notifiers->len) { 1239 return NULL; 1240 } 1241 return g_ptr_array_index(u->notifiers, idx); 1242 } 1243 1244 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1245 struct vhost_vring_state *ring) 1246 { 1247 int ret; 1248 VhostUserMsg msg = { 1249 .hdr.request = VHOST_USER_GET_VRING_BASE, 1250 .hdr.flags = VHOST_USER_VERSION, 1251 .payload.state = *ring, 1252 .hdr.size = sizeof(msg.payload.state), 1253 }; 1254 struct vhost_user *u = dev->opaque; 1255 1256 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1257 if (n) { 1258 vhost_user_host_notifier_remove(n, dev->vdev); 1259 } 1260 1261 ret = vhost_user_write(dev, &msg, NULL, 0); 1262 if (ret < 0) { 1263 return ret; 1264 } 1265 1266 ret = vhost_user_read(dev, &msg); 1267 if (ret < 0) { 1268 return ret; 1269 } 1270 1271 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1272 error_report("Received unexpected msg type. Expected %d received %d", 1273 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1274 return -EPROTO; 1275 } 1276 1277 if (msg.hdr.size != sizeof(msg.payload.state)) { 1278 error_report("Received bad msg size."); 1279 return -EPROTO; 1280 } 1281 1282 *ring = msg.payload.state; 1283 1284 return 0; 1285 } 1286 1287 static int vhost_set_vring_file(struct vhost_dev *dev, 1288 VhostUserRequest request, 1289 struct vhost_vring_file *file) 1290 { 1291 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1292 size_t fd_num = 0; 1293 VhostUserMsg msg = { 1294 .hdr.request = request, 1295 .hdr.flags = VHOST_USER_VERSION, 1296 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1297 .hdr.size = sizeof(msg.payload.u64), 1298 }; 1299 1300 if (ioeventfd_enabled() && file->fd > 0) { 1301 fds[fd_num++] = file->fd; 1302 } else { 1303 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1304 } 1305 1306 return vhost_user_write(dev, &msg, fds, fd_num); 1307 } 1308 1309 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1310 struct vhost_vring_file *file) 1311 { 1312 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1313 } 1314 1315 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1316 struct vhost_vring_file *file) 1317 { 1318 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1319 } 1320 1321 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1322 struct vhost_vring_file *file) 1323 { 1324 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1325 } 1326 1327 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1328 { 1329 int ret; 1330 VhostUserMsg msg = { 1331 .hdr.request = request, 1332 .hdr.flags = VHOST_USER_VERSION, 1333 }; 1334 1335 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1336 return 0; 1337 } 1338 1339 ret = vhost_user_write(dev, &msg, NULL, 0); 1340 if (ret < 0) { 1341 return ret; 1342 } 1343 1344 ret = vhost_user_read(dev, &msg); 1345 if (ret < 0) { 1346 return ret; 1347 } 1348 1349 if (msg.hdr.request != request) { 1350 error_report("Received unexpected msg type. Expected %d received %d", 1351 request, msg.hdr.request); 1352 return -EPROTO; 1353 } 1354 1355 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1356 error_report("Received bad msg size."); 1357 return -EPROTO; 1358 } 1359 1360 *u64 = msg.payload.u64; 1361 1362 return 0; 1363 } 1364 1365 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1366 { 1367 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1368 return -EPROTO; 1369 } 1370 1371 return 0; 1372 } 1373 1374 static int enforce_reply(struct vhost_dev *dev, 1375 const VhostUserMsg *msg) 1376 { 1377 uint64_t dummy; 1378 1379 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1380 return process_message_reply(dev, msg); 1381 } 1382 1383 /* 1384 * We need to wait for a reply but the backend does not 1385 * support replies for the command we just sent. 1386 * Send VHOST_USER_GET_FEATURES which makes all backends 1387 * send a reply. 1388 */ 1389 return vhost_user_get_features(dev, &dummy); 1390 } 1391 1392 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1393 struct vhost_vring_addr *addr) 1394 { 1395 int ret; 1396 VhostUserMsg msg = { 1397 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1398 .hdr.flags = VHOST_USER_VERSION, 1399 .payload.addr = *addr, 1400 .hdr.size = sizeof(msg.payload.addr), 1401 }; 1402 1403 bool reply_supported = virtio_has_feature(dev->protocol_features, 1404 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1405 1406 /* 1407 * wait for a reply if logging is enabled to make sure 1408 * backend is actually logging changes 1409 */ 1410 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1411 1412 if (reply_supported && wait_for_reply) { 1413 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1414 } 1415 1416 ret = vhost_user_write(dev, &msg, NULL, 0); 1417 if (ret < 0) { 1418 return ret; 1419 } 1420 1421 if (wait_for_reply) { 1422 return enforce_reply(dev, &msg); 1423 } 1424 1425 return 0; 1426 } 1427 1428 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1429 bool wait_for_reply) 1430 { 1431 VhostUserMsg msg = { 1432 .hdr.request = request, 1433 .hdr.flags = VHOST_USER_VERSION, 1434 .payload.u64 = u64, 1435 .hdr.size = sizeof(msg.payload.u64), 1436 }; 1437 int ret; 1438 1439 if (wait_for_reply) { 1440 bool reply_supported = virtio_has_feature(dev->protocol_features, 1441 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1442 if (reply_supported) { 1443 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1444 } 1445 } 1446 1447 ret = vhost_user_write(dev, &msg, NULL, 0); 1448 if (ret < 0) { 1449 return ret; 1450 } 1451 1452 if (wait_for_reply) { 1453 return enforce_reply(dev, &msg); 1454 } 1455 1456 return 0; 1457 } 1458 1459 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1460 { 1461 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1462 } 1463 1464 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1465 { 1466 uint64_t value; 1467 int ret; 1468 1469 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1470 if (ret < 0) { 1471 return ret; 1472 } 1473 *status = value; 1474 1475 return 0; 1476 } 1477 1478 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1479 { 1480 uint8_t s; 1481 int ret; 1482 1483 ret = vhost_user_get_status(dev, &s); 1484 if (ret < 0) { 1485 return ret; 1486 } 1487 1488 if ((s & status) == status) { 1489 return 0; 1490 } 1491 s |= status; 1492 1493 return vhost_user_set_status(dev, s); 1494 } 1495 1496 static int vhost_user_set_features(struct vhost_dev *dev, 1497 uint64_t features) 1498 { 1499 /* 1500 * wait for a reply if logging is enabled to make sure 1501 * backend is actually logging changes 1502 */ 1503 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1504 int ret; 1505 1506 /* 1507 * We need to include any extra backend only feature bits that 1508 * might be needed by our device. Currently this includes the 1509 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1510 * features. 1511 */ 1512 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1513 features | dev->backend_features, 1514 log_enabled); 1515 1516 if (virtio_has_feature(dev->protocol_features, 1517 VHOST_USER_PROTOCOL_F_STATUS)) { 1518 if (!ret) { 1519 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1520 } 1521 } 1522 1523 return ret; 1524 } 1525 1526 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1527 uint64_t features) 1528 { 1529 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1530 false); 1531 } 1532 1533 static int vhost_user_set_owner(struct vhost_dev *dev) 1534 { 1535 VhostUserMsg msg = { 1536 .hdr.request = VHOST_USER_SET_OWNER, 1537 .hdr.flags = VHOST_USER_VERSION, 1538 }; 1539 1540 return vhost_user_write(dev, &msg, NULL, 0); 1541 } 1542 1543 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1544 uint64_t *max_memslots) 1545 { 1546 uint64_t backend_max_memslots; 1547 int err; 1548 1549 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1550 &backend_max_memslots); 1551 if (err < 0) { 1552 return err; 1553 } 1554 1555 *max_memslots = backend_max_memslots; 1556 1557 return 0; 1558 } 1559 1560 static int vhost_user_reset_device(struct vhost_dev *dev) 1561 { 1562 VhostUserMsg msg = { 1563 .hdr.flags = VHOST_USER_VERSION, 1564 }; 1565 1566 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1567 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1568 ? VHOST_USER_RESET_DEVICE 1569 : VHOST_USER_RESET_OWNER; 1570 1571 return vhost_user_write(dev, &msg, NULL, 0); 1572 } 1573 1574 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1575 { 1576 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1577 return -ENOSYS; 1578 } 1579 1580 return dev->config_ops->vhost_dev_config_notifier(dev); 1581 } 1582 1583 /* 1584 * Fetch or create the notifier for a given idx. Newly created 1585 * notifiers are added to the pointer array that tracks them. 1586 */ 1587 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1588 int idx) 1589 { 1590 VhostUserHostNotifier *n = NULL; 1591 if (idx >= u->notifiers->len) { 1592 g_ptr_array_set_size(u->notifiers, idx + 1); 1593 } 1594 1595 n = g_ptr_array_index(u->notifiers, idx); 1596 if (!n) { 1597 /* 1598 * In case notification arrive out-of-order, 1599 * make room for current index. 1600 */ 1601 g_ptr_array_remove_index(u->notifiers, idx); 1602 n = g_new0(VhostUserHostNotifier, 1); 1603 n->idx = idx; 1604 g_ptr_array_insert(u->notifiers, idx, n); 1605 trace_vhost_user_create_notifier(idx, n); 1606 } 1607 1608 return n; 1609 } 1610 1611 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1612 VhostUserVringArea *area, 1613 int fd) 1614 { 1615 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1616 size_t page_size = qemu_real_host_page_size(); 1617 struct vhost_user *u = dev->opaque; 1618 VhostUserState *user = u->user; 1619 VirtIODevice *vdev = dev->vdev; 1620 VhostUserHostNotifier *n; 1621 void *addr; 1622 char *name; 1623 1624 if (!virtio_has_feature(dev->protocol_features, 1625 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1626 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1627 return -EINVAL; 1628 } 1629 1630 /* 1631 * Fetch notifier and invalidate any old data before setting up 1632 * new mapped address. 1633 */ 1634 n = fetch_or_create_notifier(user, queue_idx); 1635 vhost_user_host_notifier_remove(n, vdev); 1636 1637 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1638 return 0; 1639 } 1640 1641 /* Sanity check. */ 1642 if (area->size != page_size) { 1643 return -EINVAL; 1644 } 1645 1646 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1647 fd, area->offset); 1648 if (addr == MAP_FAILED) { 1649 return -EFAULT; 1650 } 1651 1652 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1653 user, queue_idx); 1654 if (!n->mr.ram) { /* Don't init again after suspend. */ 1655 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1656 page_size, addr); 1657 } else { 1658 n->mr.ram_block->host = addr; 1659 } 1660 g_free(name); 1661 1662 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1663 object_unparent(OBJECT(&n->mr)); 1664 munmap(addr, page_size); 1665 return -ENXIO; 1666 } 1667 1668 n->addr = addr; 1669 1670 return 0; 1671 } 1672 1673 static void close_slave_channel(struct vhost_user *u) 1674 { 1675 g_source_destroy(u->slave_src); 1676 g_source_unref(u->slave_src); 1677 u->slave_src = NULL; 1678 object_unref(OBJECT(u->slave_ioc)); 1679 u->slave_ioc = NULL; 1680 } 1681 1682 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1683 gpointer opaque) 1684 { 1685 struct vhost_dev *dev = opaque; 1686 struct vhost_user *u = dev->opaque; 1687 VhostUserHeader hdr = { 0, }; 1688 VhostUserPayload payload = { 0, }; 1689 Error *local_err = NULL; 1690 gboolean rc = G_SOURCE_CONTINUE; 1691 int ret = 0; 1692 struct iovec iov; 1693 g_autofree int *fd = NULL; 1694 size_t fdsize = 0; 1695 int i; 1696 1697 /* Read header */ 1698 iov.iov_base = &hdr; 1699 iov.iov_len = VHOST_USER_HDR_SIZE; 1700 1701 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1702 error_report_err(local_err); 1703 goto err; 1704 } 1705 1706 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1707 error_report("Failed to read msg header." 1708 " Size %d exceeds the maximum %zu.", hdr.size, 1709 VHOST_USER_PAYLOAD_SIZE); 1710 goto err; 1711 } 1712 1713 /* Read payload */ 1714 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1715 error_report_err(local_err); 1716 goto err; 1717 } 1718 1719 switch (hdr.request) { 1720 case VHOST_USER_SLAVE_IOTLB_MSG: 1721 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1722 break; 1723 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1724 ret = vhost_user_slave_handle_config_change(dev); 1725 break; 1726 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1727 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1728 fd ? fd[0] : -1); 1729 break; 1730 default: 1731 error_report("Received unexpected msg type: %d.", hdr.request); 1732 ret = -EINVAL; 1733 } 1734 1735 /* 1736 * REPLY_ACK feature handling. Other reply types has to be managed 1737 * directly in their request handlers. 1738 */ 1739 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1740 struct iovec iovec[2]; 1741 1742 1743 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1744 hdr.flags |= VHOST_USER_REPLY_MASK; 1745 1746 payload.u64 = !!ret; 1747 hdr.size = sizeof(payload.u64); 1748 1749 iovec[0].iov_base = &hdr; 1750 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1751 iovec[1].iov_base = &payload; 1752 iovec[1].iov_len = hdr.size; 1753 1754 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1755 error_report_err(local_err); 1756 goto err; 1757 } 1758 } 1759 1760 goto fdcleanup; 1761 1762 err: 1763 close_slave_channel(u); 1764 rc = G_SOURCE_REMOVE; 1765 1766 fdcleanup: 1767 if (fd) { 1768 for (i = 0; i < fdsize; i++) { 1769 close(fd[i]); 1770 } 1771 } 1772 return rc; 1773 } 1774 1775 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1776 { 1777 VhostUserMsg msg = { 1778 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1779 .hdr.flags = VHOST_USER_VERSION, 1780 }; 1781 struct vhost_user *u = dev->opaque; 1782 int sv[2], ret = 0; 1783 bool reply_supported = virtio_has_feature(dev->protocol_features, 1784 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1785 Error *local_err = NULL; 1786 QIOChannel *ioc; 1787 1788 if (!virtio_has_feature(dev->protocol_features, 1789 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1790 return 0; 1791 } 1792 1793 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1794 int saved_errno = errno; 1795 error_report("socketpair() failed"); 1796 return -saved_errno; 1797 } 1798 1799 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1800 if (!ioc) { 1801 error_report_err(local_err); 1802 return -ECONNREFUSED; 1803 } 1804 u->slave_ioc = ioc; 1805 slave_update_read_handler(dev, NULL); 1806 1807 if (reply_supported) { 1808 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1809 } 1810 1811 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1812 if (ret) { 1813 goto out; 1814 } 1815 1816 if (reply_supported) { 1817 ret = process_message_reply(dev, &msg); 1818 } 1819 1820 out: 1821 close(sv[1]); 1822 if (ret) { 1823 close_slave_channel(u); 1824 } 1825 1826 return ret; 1827 } 1828 1829 #ifdef CONFIG_LINUX 1830 /* 1831 * Called back from the postcopy fault thread when a fault is received on our 1832 * ufd. 1833 * TODO: This is Linux specific 1834 */ 1835 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1836 void *ufd) 1837 { 1838 struct vhost_dev *dev = pcfd->data; 1839 struct vhost_user *u = dev->opaque; 1840 struct uffd_msg *msg = ufd; 1841 uint64_t faultaddr = msg->arg.pagefault.address; 1842 RAMBlock *rb = NULL; 1843 uint64_t rb_offset; 1844 int i; 1845 1846 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1847 dev->mem->nregions); 1848 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1849 trace_vhost_user_postcopy_fault_handler_loop(i, 1850 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1851 if (faultaddr >= u->postcopy_client_bases[i]) { 1852 /* Ofset of the fault address in the vhost region */ 1853 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1854 if (region_offset < dev->mem->regions[i].memory_size) { 1855 rb_offset = region_offset + u->region_rb_offset[i]; 1856 trace_vhost_user_postcopy_fault_handler_found(i, 1857 region_offset, rb_offset); 1858 rb = u->region_rb[i]; 1859 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1860 rb_offset); 1861 } 1862 } 1863 } 1864 error_report("%s: Failed to find region for fault %" PRIx64, 1865 __func__, faultaddr); 1866 return -1; 1867 } 1868 1869 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1870 uint64_t offset) 1871 { 1872 struct vhost_dev *dev = pcfd->data; 1873 struct vhost_user *u = dev->opaque; 1874 int i; 1875 1876 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1877 1878 if (!u) { 1879 return 0; 1880 } 1881 /* Translate the offset into an address in the clients address space */ 1882 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1883 if (u->region_rb[i] == rb && 1884 offset >= u->region_rb_offset[i] && 1885 offset < (u->region_rb_offset[i] + 1886 dev->mem->regions[i].memory_size)) { 1887 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1888 u->postcopy_client_bases[i]; 1889 trace_vhost_user_postcopy_waker_found(client_addr); 1890 return postcopy_wake_shared(pcfd, client_addr, rb); 1891 } 1892 } 1893 1894 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1895 return 0; 1896 } 1897 #endif 1898 1899 /* 1900 * Called at the start of an inbound postcopy on reception of the 1901 * 'advise' command. 1902 */ 1903 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1904 { 1905 #ifdef CONFIG_LINUX 1906 struct vhost_user *u = dev->opaque; 1907 CharBackend *chr = u->user->chr; 1908 int ufd; 1909 int ret; 1910 VhostUserMsg msg = { 1911 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1912 .hdr.flags = VHOST_USER_VERSION, 1913 }; 1914 1915 ret = vhost_user_write(dev, &msg, NULL, 0); 1916 if (ret < 0) { 1917 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1918 return ret; 1919 } 1920 1921 ret = vhost_user_read(dev, &msg); 1922 if (ret < 0) { 1923 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1924 return ret; 1925 } 1926 1927 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1928 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1929 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1930 return -EPROTO; 1931 } 1932 1933 if (msg.hdr.size) { 1934 error_setg(errp, "Received bad msg size."); 1935 return -EPROTO; 1936 } 1937 ufd = qemu_chr_fe_get_msgfd(chr); 1938 if (ufd < 0) { 1939 error_setg(errp, "%s: Failed to get ufd", __func__); 1940 return -EIO; 1941 } 1942 qemu_socket_set_nonblock(ufd); 1943 1944 /* register ufd with userfault thread */ 1945 u->postcopy_fd.fd = ufd; 1946 u->postcopy_fd.data = dev; 1947 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1948 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1949 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1950 postcopy_register_shared_ufd(&u->postcopy_fd); 1951 return 0; 1952 #else 1953 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1954 return -ENOSYS; 1955 #endif 1956 } 1957 1958 /* 1959 * Called at the switch to postcopy on reception of the 'listen' command. 1960 */ 1961 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1962 { 1963 struct vhost_user *u = dev->opaque; 1964 int ret; 1965 VhostUserMsg msg = { 1966 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1967 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1968 }; 1969 u->postcopy_listen = true; 1970 1971 trace_vhost_user_postcopy_listen(); 1972 1973 ret = vhost_user_write(dev, &msg, NULL, 0); 1974 if (ret < 0) { 1975 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1976 return ret; 1977 } 1978 1979 ret = process_message_reply(dev, &msg); 1980 if (ret) { 1981 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1982 return ret; 1983 } 1984 1985 return 0; 1986 } 1987 1988 /* 1989 * Called at the end of postcopy 1990 */ 1991 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1992 { 1993 VhostUserMsg msg = { 1994 .hdr.request = VHOST_USER_POSTCOPY_END, 1995 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1996 }; 1997 int ret; 1998 struct vhost_user *u = dev->opaque; 1999 2000 trace_vhost_user_postcopy_end_entry(); 2001 2002 ret = vhost_user_write(dev, &msg, NULL, 0); 2003 if (ret < 0) { 2004 error_setg(errp, "Failed to send postcopy_end to vhost"); 2005 return ret; 2006 } 2007 2008 ret = process_message_reply(dev, &msg); 2009 if (ret) { 2010 error_setg(errp, "Failed to receive reply to postcopy_end"); 2011 return ret; 2012 } 2013 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2014 close(u->postcopy_fd.fd); 2015 u->postcopy_fd.handler = NULL; 2016 2017 trace_vhost_user_postcopy_end_exit(); 2018 2019 return 0; 2020 } 2021 2022 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2023 void *opaque) 2024 { 2025 struct PostcopyNotifyData *pnd = opaque; 2026 struct vhost_user *u = container_of(notifier, struct vhost_user, 2027 postcopy_notifier); 2028 struct vhost_dev *dev = u->dev; 2029 2030 switch (pnd->reason) { 2031 case POSTCOPY_NOTIFY_PROBE: 2032 if (!virtio_has_feature(dev->protocol_features, 2033 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2034 /* TODO: Get the device name into this error somehow */ 2035 error_setg(pnd->errp, 2036 "vhost-user backend not capable of postcopy"); 2037 return -ENOENT; 2038 } 2039 break; 2040 2041 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2042 return vhost_user_postcopy_advise(dev, pnd->errp); 2043 2044 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2045 return vhost_user_postcopy_listen(dev, pnd->errp); 2046 2047 case POSTCOPY_NOTIFY_INBOUND_END: 2048 return vhost_user_postcopy_end(dev, pnd->errp); 2049 2050 default: 2051 /* We ignore notifications we don't know */ 2052 break; 2053 } 2054 2055 return 0; 2056 } 2057 2058 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2059 Error **errp) 2060 { 2061 uint64_t features, ram_slots; 2062 struct vhost_user *u; 2063 VhostUserState *vus = (VhostUserState *) opaque; 2064 int err; 2065 2066 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2067 2068 u = g_new0(struct vhost_user, 1); 2069 u->user = vus; 2070 u->dev = dev; 2071 dev->opaque = u; 2072 2073 err = vhost_user_get_features(dev, &features); 2074 if (err < 0) { 2075 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2076 return err; 2077 } 2078 2079 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2080 bool supports_f_config = vus->supports_config || 2081 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2082 uint64_t protocol_features; 2083 2084 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2085 2086 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2087 &protocol_features); 2088 if (err < 0) { 2089 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2090 return -EPROTO; 2091 } 2092 2093 /* 2094 * We will use all the protocol features we support - although 2095 * we suppress F_CONFIG if we know QEMUs internal code can not support 2096 * it. 2097 */ 2098 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2099 2100 if (supports_f_config) { 2101 if (!virtio_has_feature(protocol_features, 2102 VHOST_USER_PROTOCOL_F_CONFIG)) { 2103 error_setg(errp, "vhost-user device expecting " 2104 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2105 "not support it."); 2106 return -EPROTO; 2107 } 2108 } else { 2109 if (virtio_has_feature(protocol_features, 2110 VHOST_USER_PROTOCOL_F_CONFIG)) { 2111 warn_reportf_err(*errp, "vhost-user backend supports " 2112 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2113 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2114 } 2115 } 2116 2117 /* final set of protocol features */ 2118 dev->protocol_features = protocol_features; 2119 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2120 if (err < 0) { 2121 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2122 return -EPROTO; 2123 } 2124 2125 /* query the max queues we support if backend supports Multiple Queue */ 2126 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2127 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2128 &dev->max_queues); 2129 if (err < 0) { 2130 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2131 return -EPROTO; 2132 } 2133 } else { 2134 dev->max_queues = 1; 2135 } 2136 2137 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2138 error_setg(errp, "The maximum number of queues supported by the " 2139 "backend is %" PRIu64, dev->max_queues); 2140 return -EINVAL; 2141 } 2142 2143 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2144 !(virtio_has_feature(dev->protocol_features, 2145 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 2146 virtio_has_feature(dev->protocol_features, 2147 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2148 error_setg(errp, "IOMMU support requires reply-ack and " 2149 "slave-req protocol features."); 2150 return -EINVAL; 2151 } 2152 2153 /* get max memory regions if backend supports configurable RAM slots */ 2154 if (!virtio_has_feature(dev->protocol_features, 2155 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2156 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2157 } else { 2158 err = vhost_user_get_max_memslots(dev, &ram_slots); 2159 if (err < 0) { 2160 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2161 return -EPROTO; 2162 } 2163 2164 if (ram_slots < u->user->memory_slots) { 2165 error_setg(errp, "The backend specified a max ram slots limit " 2166 "of %" PRIu64", when the prior validated limit was " 2167 "%d. This limit should never decrease.", ram_slots, 2168 u->user->memory_slots); 2169 return -EINVAL; 2170 } 2171 2172 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2173 } 2174 } 2175 2176 if (dev->migration_blocker == NULL && 2177 !virtio_has_feature(dev->protocol_features, 2178 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2179 error_setg(&dev->migration_blocker, 2180 "Migration disabled: vhost-user backend lacks " 2181 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2182 } 2183 2184 if (dev->vq_index == 0) { 2185 err = vhost_setup_slave_channel(dev); 2186 if (err < 0) { 2187 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2188 return -EPROTO; 2189 } 2190 } 2191 2192 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2193 postcopy_add_notifier(&u->postcopy_notifier); 2194 2195 return 0; 2196 } 2197 2198 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2199 { 2200 struct vhost_user *u; 2201 2202 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2203 2204 u = dev->opaque; 2205 if (u->postcopy_notifier.notify) { 2206 postcopy_remove_notifier(&u->postcopy_notifier); 2207 u->postcopy_notifier.notify = NULL; 2208 } 2209 u->postcopy_listen = false; 2210 if (u->postcopy_fd.handler) { 2211 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2212 close(u->postcopy_fd.fd); 2213 u->postcopy_fd.handler = NULL; 2214 } 2215 if (u->slave_ioc) { 2216 close_slave_channel(u); 2217 } 2218 g_free(u->region_rb); 2219 u->region_rb = NULL; 2220 g_free(u->region_rb_offset); 2221 u->region_rb_offset = NULL; 2222 u->region_rb_len = 0; 2223 g_free(u); 2224 dev->opaque = 0; 2225 2226 return 0; 2227 } 2228 2229 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2230 { 2231 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2232 2233 return idx; 2234 } 2235 2236 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2237 { 2238 struct vhost_user *u = dev->opaque; 2239 2240 return u->user->memory_slots; 2241 } 2242 2243 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2244 { 2245 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2246 2247 return virtio_has_feature(dev->protocol_features, 2248 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2249 } 2250 2251 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2252 { 2253 VhostUserMsg msg = { }; 2254 2255 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2256 2257 /* If guest supports GUEST_ANNOUNCE do nothing */ 2258 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2259 return 0; 2260 } 2261 2262 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2263 if (virtio_has_feature(dev->protocol_features, 2264 VHOST_USER_PROTOCOL_F_RARP)) { 2265 msg.hdr.request = VHOST_USER_SEND_RARP; 2266 msg.hdr.flags = VHOST_USER_VERSION; 2267 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2268 msg.hdr.size = sizeof(msg.payload.u64); 2269 2270 return vhost_user_write(dev, &msg, NULL, 0); 2271 } 2272 return -ENOTSUP; 2273 } 2274 2275 static bool vhost_user_can_merge(struct vhost_dev *dev, 2276 uint64_t start1, uint64_t size1, 2277 uint64_t start2, uint64_t size2) 2278 { 2279 ram_addr_t offset; 2280 int mfd, rfd; 2281 2282 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2283 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2284 2285 return mfd == rfd; 2286 } 2287 2288 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2289 { 2290 VhostUserMsg msg; 2291 bool reply_supported = virtio_has_feature(dev->protocol_features, 2292 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2293 int ret; 2294 2295 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2296 return 0; 2297 } 2298 2299 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2300 msg.payload.u64 = mtu; 2301 msg.hdr.size = sizeof(msg.payload.u64); 2302 msg.hdr.flags = VHOST_USER_VERSION; 2303 if (reply_supported) { 2304 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2305 } 2306 2307 ret = vhost_user_write(dev, &msg, NULL, 0); 2308 if (ret < 0) { 2309 return ret; 2310 } 2311 2312 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2313 if (reply_supported) { 2314 return process_message_reply(dev, &msg); 2315 } 2316 2317 return 0; 2318 } 2319 2320 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2321 struct vhost_iotlb_msg *imsg) 2322 { 2323 int ret; 2324 VhostUserMsg msg = { 2325 .hdr.request = VHOST_USER_IOTLB_MSG, 2326 .hdr.size = sizeof(msg.payload.iotlb), 2327 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2328 .payload.iotlb = *imsg, 2329 }; 2330 2331 ret = vhost_user_write(dev, &msg, NULL, 0); 2332 if (ret < 0) { 2333 return ret; 2334 } 2335 2336 return process_message_reply(dev, &msg); 2337 } 2338 2339 2340 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2341 { 2342 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2343 } 2344 2345 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2346 uint32_t config_len, Error **errp) 2347 { 2348 int ret; 2349 VhostUserMsg msg = { 2350 .hdr.request = VHOST_USER_GET_CONFIG, 2351 .hdr.flags = VHOST_USER_VERSION, 2352 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2353 }; 2354 2355 if (!virtio_has_feature(dev->protocol_features, 2356 VHOST_USER_PROTOCOL_F_CONFIG)) { 2357 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2358 return -EINVAL; 2359 } 2360 2361 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2362 2363 msg.payload.config.offset = 0; 2364 msg.payload.config.size = config_len; 2365 ret = vhost_user_write(dev, &msg, NULL, 0); 2366 if (ret < 0) { 2367 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2368 return ret; 2369 } 2370 2371 ret = vhost_user_read(dev, &msg); 2372 if (ret < 0) { 2373 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2374 return ret; 2375 } 2376 2377 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2378 error_setg(errp, 2379 "Received unexpected msg type. Expected %d received %d", 2380 VHOST_USER_GET_CONFIG, msg.hdr.request); 2381 return -EPROTO; 2382 } 2383 2384 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2385 error_setg(errp, "Received bad msg size."); 2386 return -EPROTO; 2387 } 2388 2389 memcpy(config, msg.payload.config.region, config_len); 2390 2391 return 0; 2392 } 2393 2394 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2395 uint32_t offset, uint32_t size, uint32_t flags) 2396 { 2397 int ret; 2398 uint8_t *p; 2399 bool reply_supported = virtio_has_feature(dev->protocol_features, 2400 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2401 2402 VhostUserMsg msg = { 2403 .hdr.request = VHOST_USER_SET_CONFIG, 2404 .hdr.flags = VHOST_USER_VERSION, 2405 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2406 }; 2407 2408 if (!virtio_has_feature(dev->protocol_features, 2409 VHOST_USER_PROTOCOL_F_CONFIG)) { 2410 return -ENOTSUP; 2411 } 2412 2413 if (reply_supported) { 2414 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2415 } 2416 2417 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2418 return -EINVAL; 2419 } 2420 2421 msg.payload.config.offset = offset, 2422 msg.payload.config.size = size, 2423 msg.payload.config.flags = flags, 2424 p = msg.payload.config.region; 2425 memcpy(p, data, size); 2426 2427 ret = vhost_user_write(dev, &msg, NULL, 0); 2428 if (ret < 0) { 2429 return ret; 2430 } 2431 2432 if (reply_supported) { 2433 return process_message_reply(dev, &msg); 2434 } 2435 2436 return 0; 2437 } 2438 2439 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2440 void *session_info, 2441 uint64_t *session_id) 2442 { 2443 int ret; 2444 bool crypto_session = virtio_has_feature(dev->protocol_features, 2445 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2446 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2447 VhostUserMsg msg = { 2448 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2449 .hdr.flags = VHOST_USER_VERSION, 2450 .hdr.size = sizeof(msg.payload.session), 2451 }; 2452 2453 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2454 2455 if (!crypto_session) { 2456 error_report("vhost-user trying to send unhandled ioctl"); 2457 return -ENOTSUP; 2458 } 2459 2460 memcpy(&msg.payload.session.session_setup_data, sess_info, 2461 sizeof(CryptoDevBackendSymSessionInfo)); 2462 if (sess_info->key_len) { 2463 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2464 sess_info->key_len); 2465 } 2466 if (sess_info->auth_key_len > 0) { 2467 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2468 sess_info->auth_key_len); 2469 } 2470 ret = vhost_user_write(dev, &msg, NULL, 0); 2471 if (ret < 0) { 2472 error_report("vhost_user_write() return %d, create session failed", 2473 ret); 2474 return ret; 2475 } 2476 2477 ret = vhost_user_read(dev, &msg); 2478 if (ret < 0) { 2479 error_report("vhost_user_read() return %d, create session failed", 2480 ret); 2481 return ret; 2482 } 2483 2484 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2485 error_report("Received unexpected msg type. Expected %d received %d", 2486 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2487 return -EPROTO; 2488 } 2489 2490 if (msg.hdr.size != sizeof(msg.payload.session)) { 2491 error_report("Received bad msg size."); 2492 return -EPROTO; 2493 } 2494 2495 if (msg.payload.session.session_id < 0) { 2496 error_report("Bad session id: %" PRId64 "", 2497 msg.payload.session.session_id); 2498 return -EINVAL; 2499 } 2500 *session_id = msg.payload.session.session_id; 2501 2502 return 0; 2503 } 2504 2505 static int 2506 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2507 { 2508 int ret; 2509 bool crypto_session = virtio_has_feature(dev->protocol_features, 2510 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2511 VhostUserMsg msg = { 2512 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2513 .hdr.flags = VHOST_USER_VERSION, 2514 .hdr.size = sizeof(msg.payload.u64), 2515 }; 2516 msg.payload.u64 = session_id; 2517 2518 if (!crypto_session) { 2519 error_report("vhost-user trying to send unhandled ioctl"); 2520 return -ENOTSUP; 2521 } 2522 2523 ret = vhost_user_write(dev, &msg, NULL, 0); 2524 if (ret < 0) { 2525 error_report("vhost_user_write() return %d, close session failed", 2526 ret); 2527 return ret; 2528 } 2529 2530 return 0; 2531 } 2532 2533 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2534 MemoryRegionSection *section) 2535 { 2536 return memory_region_get_fd(section->mr) >= 0; 2537 } 2538 2539 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2540 uint16_t queue_size, 2541 struct vhost_inflight *inflight) 2542 { 2543 void *addr; 2544 int fd; 2545 int ret; 2546 struct vhost_user *u = dev->opaque; 2547 CharBackend *chr = u->user->chr; 2548 VhostUserMsg msg = { 2549 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2550 .hdr.flags = VHOST_USER_VERSION, 2551 .payload.inflight.num_queues = dev->nvqs, 2552 .payload.inflight.queue_size = queue_size, 2553 .hdr.size = sizeof(msg.payload.inflight), 2554 }; 2555 2556 if (!virtio_has_feature(dev->protocol_features, 2557 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2558 return 0; 2559 } 2560 2561 ret = vhost_user_write(dev, &msg, NULL, 0); 2562 if (ret < 0) { 2563 return ret; 2564 } 2565 2566 ret = vhost_user_read(dev, &msg); 2567 if (ret < 0) { 2568 return ret; 2569 } 2570 2571 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2572 error_report("Received unexpected msg type. " 2573 "Expected %d received %d", 2574 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2575 return -EPROTO; 2576 } 2577 2578 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2579 error_report("Received bad msg size."); 2580 return -EPROTO; 2581 } 2582 2583 if (!msg.payload.inflight.mmap_size) { 2584 return 0; 2585 } 2586 2587 fd = qemu_chr_fe_get_msgfd(chr); 2588 if (fd < 0) { 2589 error_report("Failed to get mem fd"); 2590 return -EIO; 2591 } 2592 2593 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2594 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2595 2596 if (addr == MAP_FAILED) { 2597 error_report("Failed to mmap mem fd"); 2598 close(fd); 2599 return -EFAULT; 2600 } 2601 2602 inflight->addr = addr; 2603 inflight->fd = fd; 2604 inflight->size = msg.payload.inflight.mmap_size; 2605 inflight->offset = msg.payload.inflight.mmap_offset; 2606 inflight->queue_size = queue_size; 2607 2608 return 0; 2609 } 2610 2611 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2612 struct vhost_inflight *inflight) 2613 { 2614 VhostUserMsg msg = { 2615 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2616 .hdr.flags = VHOST_USER_VERSION, 2617 .payload.inflight.mmap_size = inflight->size, 2618 .payload.inflight.mmap_offset = inflight->offset, 2619 .payload.inflight.num_queues = dev->nvqs, 2620 .payload.inflight.queue_size = inflight->queue_size, 2621 .hdr.size = sizeof(msg.payload.inflight), 2622 }; 2623 2624 if (!virtio_has_feature(dev->protocol_features, 2625 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2626 return 0; 2627 } 2628 2629 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2630 } 2631 2632 static void vhost_user_state_destroy(gpointer data) 2633 { 2634 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2635 if (n) { 2636 vhost_user_host_notifier_remove(n, NULL); 2637 object_unparent(OBJECT(&n->mr)); 2638 /* 2639 * We can't free until vhost_user_host_notifier_remove has 2640 * done it's thing so schedule the free with RCU. 2641 */ 2642 g_free_rcu(n, rcu); 2643 } 2644 } 2645 2646 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2647 { 2648 if (user->chr) { 2649 error_setg(errp, "Cannot initialize vhost-user state"); 2650 return false; 2651 } 2652 user->chr = chr; 2653 user->memory_slots = 0; 2654 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2655 &vhost_user_state_destroy); 2656 return true; 2657 } 2658 2659 void vhost_user_cleanup(VhostUserState *user) 2660 { 2661 if (!user->chr) { 2662 return; 2663 } 2664 memory_region_transaction_begin(); 2665 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2666 memory_region_transaction_commit(); 2667 user->chr = NULL; 2668 } 2669 2670 2671 typedef struct { 2672 vu_async_close_fn cb; 2673 DeviceState *dev; 2674 CharBackend *cd; 2675 struct vhost_dev *vhost; 2676 } VhostAsyncCallback; 2677 2678 static void vhost_user_async_close_bh(void *opaque) 2679 { 2680 VhostAsyncCallback *data = opaque; 2681 struct vhost_dev *vhost = data->vhost; 2682 2683 /* 2684 * If the vhost_dev has been cleared in the meantime there is 2685 * nothing left to do as some other path has completed the 2686 * cleanup. 2687 */ 2688 if (vhost->vdev) { 2689 data->cb(data->dev); 2690 } 2691 2692 g_free(data); 2693 } 2694 2695 /* 2696 * We only schedule the work if the machine is running. If suspended 2697 * we want to keep all the in-flight data as is for migration 2698 * purposes. 2699 */ 2700 void vhost_user_async_close(DeviceState *d, 2701 CharBackend *chardev, struct vhost_dev *vhost, 2702 vu_async_close_fn cb) 2703 { 2704 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2705 /* 2706 * A close event may happen during a read/write, but vhost 2707 * code assumes the vhost_dev remains setup, so delay the 2708 * stop & clear. 2709 */ 2710 AioContext *ctx = qemu_get_current_aio_context(); 2711 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2712 2713 /* Save data for the callback */ 2714 data->cb = cb; 2715 data->dev = d; 2716 data->cd = chardev; 2717 data->vhost = vhost; 2718 2719 /* Disable any further notifications on the chardev */ 2720 qemu_chr_fe_set_handlers(chardev, 2721 NULL, NULL, NULL, NULL, NULL, NULL, 2722 false); 2723 2724 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2725 2726 /* 2727 * Move vhost device to the stopped state. The vhost-user device 2728 * will be clean up and disconnected in BH. This can be useful in 2729 * the vhost migration code. If disconnect was caught there is an 2730 * option for the general vhost code to get the dev state without 2731 * knowing its type (in this case vhost-user). 2732 * 2733 * Note if the vhost device is fully cleared by the time we 2734 * execute the bottom half we won't continue with the cleanup. 2735 */ 2736 vhost->started = false; 2737 } 2738 } 2739 2740 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2741 { 2742 if (!virtio_has_feature(dev->protocol_features, 2743 VHOST_USER_PROTOCOL_F_STATUS)) { 2744 return 0; 2745 } 2746 2747 /* Set device status only for last queue pair */ 2748 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2749 return 0; 2750 } 2751 2752 if (started) { 2753 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2754 VIRTIO_CONFIG_S_DRIVER | 2755 VIRTIO_CONFIG_S_DRIVER_OK); 2756 } else { 2757 return vhost_user_set_status(dev, 0); 2758 } 2759 } 2760 2761 const VhostOps user_ops = { 2762 .backend_type = VHOST_BACKEND_TYPE_USER, 2763 .vhost_backend_init = vhost_user_backend_init, 2764 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2765 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2766 .vhost_set_log_base = vhost_user_set_log_base, 2767 .vhost_set_mem_table = vhost_user_set_mem_table, 2768 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2769 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2770 .vhost_set_vring_num = vhost_user_set_vring_num, 2771 .vhost_set_vring_base = vhost_user_set_vring_base, 2772 .vhost_get_vring_base = vhost_user_get_vring_base, 2773 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2774 .vhost_set_vring_call = vhost_user_set_vring_call, 2775 .vhost_set_vring_err = vhost_user_set_vring_err, 2776 .vhost_set_features = vhost_user_set_features, 2777 .vhost_get_features = vhost_user_get_features, 2778 .vhost_set_owner = vhost_user_set_owner, 2779 .vhost_reset_device = vhost_user_reset_device, 2780 .vhost_get_vq_index = vhost_user_get_vq_index, 2781 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2782 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2783 .vhost_migration_done = vhost_user_migration_done, 2784 .vhost_backend_can_merge = vhost_user_can_merge, 2785 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2786 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2787 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2788 .vhost_get_config = vhost_user_get_config, 2789 .vhost_set_config = vhost_user_set_config, 2790 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2791 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2792 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2793 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2794 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2795 .vhost_dev_start = vhost_user_dev_start, 2796 }; 2797