1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/cryptodev.h" 26 #include "migration/migration.h" 27 #include "migration/postcopy-ram.h" 28 #include "trace.h" 29 #include "exec/ramblock.h" 30 31 #include <sys/ioctl.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 35 #include "standard-headers/linux/vhost_types.h" 36 37 #ifdef CONFIG_LINUX 38 #include <linux/userfaultfd.h> 39 #endif 40 41 #define VHOST_MEMORY_BASELINE_NREGIONS 8 42 #define VHOST_USER_F_PROTOCOL_FEATURES 30 43 #define VHOST_USER_SLAVE_MAX_FDS 8 44 45 /* 46 * Set maximum number of RAM slots supported to 47 * the maximum number supported by the target 48 * hardware plaform. 49 */ 50 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 51 defined(TARGET_ARM) || defined(TARGET_ARM_64) 52 #include "hw/acpi/acpi.h" 53 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 54 55 #elif defined(TARGET_PPC) || defined(TARGET_PPC64) 56 #include "hw/ppc/spapr.h" 57 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 58 59 #else 60 #define VHOST_USER_MAX_RAM_SLOTS 512 61 #endif 62 63 /* 64 * Maximum size of virtio device config space 65 */ 66 #define VHOST_USER_MAX_CONFIG_SIZE 256 67 68 enum VhostUserProtocolFeature { 69 VHOST_USER_PROTOCOL_F_MQ = 0, 70 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 71 VHOST_USER_PROTOCOL_F_RARP = 2, 72 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 73 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 74 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 75 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 76 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 77 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 78 VHOST_USER_PROTOCOL_F_CONFIG = 9, 79 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 80 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 81 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 82 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 83 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 84 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 85 VHOST_USER_PROTOCOL_F_STATUS = 16, 86 VHOST_USER_PROTOCOL_F_MAX 87 }; 88 89 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 90 91 typedef enum VhostUserRequest { 92 VHOST_USER_NONE = 0, 93 VHOST_USER_GET_FEATURES = 1, 94 VHOST_USER_SET_FEATURES = 2, 95 VHOST_USER_SET_OWNER = 3, 96 VHOST_USER_RESET_OWNER = 4, 97 VHOST_USER_SET_MEM_TABLE = 5, 98 VHOST_USER_SET_LOG_BASE = 6, 99 VHOST_USER_SET_LOG_FD = 7, 100 VHOST_USER_SET_VRING_NUM = 8, 101 VHOST_USER_SET_VRING_ADDR = 9, 102 VHOST_USER_SET_VRING_BASE = 10, 103 VHOST_USER_GET_VRING_BASE = 11, 104 VHOST_USER_SET_VRING_KICK = 12, 105 VHOST_USER_SET_VRING_CALL = 13, 106 VHOST_USER_SET_VRING_ERR = 14, 107 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 108 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 109 VHOST_USER_GET_QUEUE_NUM = 17, 110 VHOST_USER_SET_VRING_ENABLE = 18, 111 VHOST_USER_SEND_RARP = 19, 112 VHOST_USER_NET_SET_MTU = 20, 113 VHOST_USER_SET_SLAVE_REQ_FD = 21, 114 VHOST_USER_IOTLB_MSG = 22, 115 VHOST_USER_SET_VRING_ENDIAN = 23, 116 VHOST_USER_GET_CONFIG = 24, 117 VHOST_USER_SET_CONFIG = 25, 118 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 119 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 120 VHOST_USER_POSTCOPY_ADVISE = 28, 121 VHOST_USER_POSTCOPY_LISTEN = 29, 122 VHOST_USER_POSTCOPY_END = 30, 123 VHOST_USER_GET_INFLIGHT_FD = 31, 124 VHOST_USER_SET_INFLIGHT_FD = 32, 125 VHOST_USER_GPU_SET_SOCKET = 33, 126 VHOST_USER_RESET_DEVICE = 34, 127 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 128 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 129 VHOST_USER_ADD_MEM_REG = 37, 130 VHOST_USER_REM_MEM_REG = 38, 131 VHOST_USER_SET_STATUS = 39, 132 VHOST_USER_GET_STATUS = 40, 133 VHOST_USER_MAX 134 } VhostUserRequest; 135 136 typedef enum VhostUserSlaveRequest { 137 VHOST_USER_SLAVE_NONE = 0, 138 VHOST_USER_SLAVE_IOTLB_MSG = 1, 139 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 140 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 141 VHOST_USER_SLAVE_MAX 142 } VhostUserSlaveRequest; 143 144 typedef struct VhostUserMemoryRegion { 145 uint64_t guest_phys_addr; 146 uint64_t memory_size; 147 uint64_t userspace_addr; 148 uint64_t mmap_offset; 149 } VhostUserMemoryRegion; 150 151 typedef struct VhostUserMemory { 152 uint32_t nregions; 153 uint32_t padding; 154 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 155 } VhostUserMemory; 156 157 typedef struct VhostUserMemRegMsg { 158 uint64_t padding; 159 VhostUserMemoryRegion region; 160 } VhostUserMemRegMsg; 161 162 typedef struct VhostUserLog { 163 uint64_t mmap_size; 164 uint64_t mmap_offset; 165 } VhostUserLog; 166 167 typedef struct VhostUserConfig { 168 uint32_t offset; 169 uint32_t size; 170 uint32_t flags; 171 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 172 } VhostUserConfig; 173 174 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 175 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 176 177 typedef struct VhostUserCryptoSession { 178 /* session id for success, -1 on errors */ 179 int64_t session_id; 180 CryptoDevBackendSymSessionInfo session_setup_data; 181 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 182 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 183 } VhostUserCryptoSession; 184 185 static VhostUserConfig c __attribute__ ((unused)); 186 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 187 + sizeof(c.size) \ 188 + sizeof(c.flags)) 189 190 typedef struct VhostUserVringArea { 191 uint64_t u64; 192 uint64_t size; 193 uint64_t offset; 194 } VhostUserVringArea; 195 196 typedef struct VhostUserInflight { 197 uint64_t mmap_size; 198 uint64_t mmap_offset; 199 uint16_t num_queues; 200 uint16_t queue_size; 201 } VhostUserInflight; 202 203 typedef struct { 204 VhostUserRequest request; 205 206 #define VHOST_USER_VERSION_MASK (0x3) 207 #define VHOST_USER_REPLY_MASK (0x1 << 2) 208 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 209 uint32_t flags; 210 uint32_t size; /* the following payload size */ 211 } QEMU_PACKED VhostUserHeader; 212 213 typedef union { 214 #define VHOST_USER_VRING_IDX_MASK (0xff) 215 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 216 uint64_t u64; 217 struct vhost_vring_state state; 218 struct vhost_vring_addr addr; 219 VhostUserMemory memory; 220 VhostUserMemRegMsg mem_reg; 221 VhostUserLog log; 222 struct vhost_iotlb_msg iotlb; 223 VhostUserConfig config; 224 VhostUserCryptoSession session; 225 VhostUserVringArea area; 226 VhostUserInflight inflight; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *slave_ioc; 247 GSource *slave_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static bool ioeventfd_enabled(void) 276 { 277 return !kvm_enabled() || kvm_eventfds_enabled(); 278 } 279 280 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 281 { 282 struct vhost_user *u = dev->opaque; 283 CharBackend *chr = u->user->chr; 284 uint8_t *p = (uint8_t *) msg; 285 int r, size = VHOST_USER_HDR_SIZE; 286 287 r = qemu_chr_fe_read_all(chr, p, size); 288 if (r != size) { 289 int saved_errno = errno; 290 error_report("Failed to read msg header. Read %d instead of %d." 291 " Original request %d.", r, size, msg->hdr.request); 292 return r < 0 ? -saved_errno : -EIO; 293 } 294 295 /* validate received flags */ 296 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 297 error_report("Failed to read msg header." 298 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 299 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 300 return -EPROTO; 301 } 302 303 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 304 305 return 0; 306 } 307 308 struct vhost_user_read_cb_data { 309 struct vhost_dev *dev; 310 VhostUserMsg *msg; 311 GMainLoop *loop; 312 int ret; 313 }; 314 315 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 316 gpointer opaque) 317 { 318 struct vhost_user_read_cb_data *data = opaque; 319 struct vhost_dev *dev = data->dev; 320 VhostUserMsg *msg = data->msg; 321 struct vhost_user *u = dev->opaque; 322 CharBackend *chr = u->user->chr; 323 uint8_t *p = (uint8_t *) msg; 324 int r, size; 325 326 r = vhost_user_read_header(dev, msg); 327 if (r < 0) { 328 data->ret = r; 329 goto end; 330 } 331 332 /* validate message size is sane */ 333 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 334 error_report("Failed to read msg header." 335 " Size %d exceeds the maximum %zu.", msg->hdr.size, 336 VHOST_USER_PAYLOAD_SIZE); 337 data->ret = -EPROTO; 338 goto end; 339 } 340 341 if (msg->hdr.size) { 342 p += VHOST_USER_HDR_SIZE; 343 size = msg->hdr.size; 344 r = qemu_chr_fe_read_all(chr, p, size); 345 if (r != size) { 346 int saved_errno = errno; 347 error_report("Failed to read msg payload." 348 " Read %d instead of %d.", r, msg->hdr.size); 349 data->ret = r < 0 ? -saved_errno : -EIO; 350 goto end; 351 } 352 } 353 354 end: 355 g_main_loop_quit(data->loop); 356 return G_SOURCE_REMOVE; 357 } 358 359 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 360 gpointer opaque); 361 362 /* 363 * This updates the read handler to use a new event loop context. 364 * Event sources are removed from the previous context : this ensures 365 * that events detected in the previous context are purged. They will 366 * be re-detected and processed in the new context. 367 */ 368 static void slave_update_read_handler(struct vhost_dev *dev, 369 GMainContext *ctxt) 370 { 371 struct vhost_user *u = dev->opaque; 372 373 if (!u->slave_ioc) { 374 return; 375 } 376 377 if (u->slave_src) { 378 g_source_destroy(u->slave_src); 379 g_source_unref(u->slave_src); 380 } 381 382 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 383 G_IO_IN | G_IO_HUP, 384 slave_read, dev, NULL, 385 ctxt); 386 } 387 388 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 389 { 390 struct vhost_user *u = dev->opaque; 391 CharBackend *chr = u->user->chr; 392 GMainContext *prev_ctxt = chr->chr->gcontext; 393 GMainContext *ctxt = g_main_context_new(); 394 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 395 struct vhost_user_read_cb_data data = { 396 .dev = dev, 397 .loop = loop, 398 .msg = msg, 399 .ret = 0 400 }; 401 402 /* 403 * We want to be able to monitor the slave channel fd while waiting 404 * for chr I/O. This requires an event loop, but we can't nest the 405 * one to which chr is currently attached : its fd handlers might not 406 * be prepared for re-entrancy. So we create a new one and switch chr 407 * to use it. 408 */ 409 slave_update_read_handler(dev, ctxt); 410 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 411 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 412 413 g_main_loop_run(loop); 414 415 /* 416 * Restore the previous event loop context. This also destroys/recreates 417 * event sources : this guarantees that all pending events in the original 418 * context that have been processed by the nested loop are purged. 419 */ 420 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 421 slave_update_read_handler(dev, NULL); 422 423 g_main_loop_unref(loop); 424 g_main_context_unref(ctxt); 425 426 return data.ret; 427 } 428 429 static int process_message_reply(struct vhost_dev *dev, 430 const VhostUserMsg *msg) 431 { 432 int ret; 433 VhostUserMsg msg_reply; 434 435 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 436 return 0; 437 } 438 439 ret = vhost_user_read(dev, &msg_reply); 440 if (ret < 0) { 441 return ret; 442 } 443 444 if (msg_reply.hdr.request != msg->hdr.request) { 445 error_report("Received unexpected msg type. " 446 "Expected %d received %d", 447 msg->hdr.request, msg_reply.hdr.request); 448 return -EPROTO; 449 } 450 451 return msg_reply.payload.u64 ? -EIO : 0; 452 } 453 454 static bool vhost_user_one_time_request(VhostUserRequest request) 455 { 456 switch (request) { 457 case VHOST_USER_SET_OWNER: 458 case VHOST_USER_RESET_OWNER: 459 case VHOST_USER_SET_MEM_TABLE: 460 case VHOST_USER_GET_QUEUE_NUM: 461 case VHOST_USER_NET_SET_MTU: 462 return true; 463 default: 464 return false; 465 } 466 } 467 468 /* most non-init callers ignore the error */ 469 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 470 int *fds, int fd_num) 471 { 472 struct vhost_user *u = dev->opaque; 473 CharBackend *chr = u->user->chr; 474 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 475 476 /* 477 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 478 * we just need send it once in the first time. For later such 479 * request, we just ignore it. 480 */ 481 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 482 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 483 return 0; 484 } 485 486 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 487 error_report("Failed to set msg fds."); 488 return -EINVAL; 489 } 490 491 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 492 if (ret != size) { 493 int saved_errno = errno; 494 error_report("Failed to write msg." 495 " Wrote %d instead of %d.", ret, size); 496 return ret < 0 ? -saved_errno : -EIO; 497 } 498 499 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 500 501 return 0; 502 } 503 504 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 505 { 506 VhostUserMsg msg = { 507 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 508 .hdr.flags = VHOST_USER_VERSION, 509 }; 510 511 return vhost_user_write(dev, &msg, &fd, 1); 512 } 513 514 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 515 struct vhost_log *log) 516 { 517 int fds[VHOST_USER_MAX_RAM_SLOTS]; 518 size_t fd_num = 0; 519 bool shmfd = virtio_has_feature(dev->protocol_features, 520 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 521 int ret; 522 VhostUserMsg msg = { 523 .hdr.request = VHOST_USER_SET_LOG_BASE, 524 .hdr.flags = VHOST_USER_VERSION, 525 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 526 .payload.log.mmap_offset = 0, 527 .hdr.size = sizeof(msg.payload.log), 528 }; 529 530 /* Send only once with first queue pair */ 531 if (dev->vq_index != 0) { 532 return 0; 533 } 534 535 if (shmfd && log->fd != -1) { 536 fds[fd_num++] = log->fd; 537 } 538 539 ret = vhost_user_write(dev, &msg, fds, fd_num); 540 if (ret < 0) { 541 return ret; 542 } 543 544 if (shmfd) { 545 msg.hdr.size = 0; 546 ret = vhost_user_read(dev, &msg); 547 if (ret < 0) { 548 return ret; 549 } 550 551 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 552 error_report("Received unexpected msg type. " 553 "Expected %d received %d", 554 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 555 return -EPROTO; 556 } 557 } 558 559 return 0; 560 } 561 562 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 563 int *fd) 564 { 565 MemoryRegion *mr; 566 567 assert((uintptr_t)addr == addr); 568 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 569 *fd = memory_region_get_fd(mr); 570 571 return mr; 572 } 573 574 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 575 struct vhost_memory_region *src, 576 uint64_t mmap_offset) 577 { 578 assert(src != NULL && dst != NULL); 579 dst->userspace_addr = src->userspace_addr; 580 dst->memory_size = src->memory_size; 581 dst->guest_phys_addr = src->guest_phys_addr; 582 dst->mmap_offset = mmap_offset; 583 } 584 585 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 586 struct vhost_dev *dev, 587 VhostUserMsg *msg, 588 int *fds, size_t *fd_num, 589 bool track_ramblocks) 590 { 591 int i, fd; 592 ram_addr_t offset; 593 MemoryRegion *mr; 594 struct vhost_memory_region *reg; 595 VhostUserMemoryRegion region_buffer; 596 597 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 598 599 for (i = 0; i < dev->mem->nregions; ++i) { 600 reg = dev->mem->regions + i; 601 602 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 603 if (fd > 0) { 604 if (track_ramblocks) { 605 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 606 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 607 reg->memory_size, 608 reg->guest_phys_addr, 609 reg->userspace_addr, 610 offset); 611 u->region_rb_offset[i] = offset; 612 u->region_rb[i] = mr->ram_block; 613 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 614 error_report("Failed preparing vhost-user memory table msg"); 615 return -ENOBUFS; 616 } 617 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 618 msg->payload.memory.regions[*fd_num] = region_buffer; 619 fds[(*fd_num)++] = fd; 620 } else if (track_ramblocks) { 621 u->region_rb_offset[i] = 0; 622 u->region_rb[i] = NULL; 623 } 624 } 625 626 msg->payload.memory.nregions = *fd_num; 627 628 if (!*fd_num) { 629 error_report("Failed initializing vhost-user memory map, " 630 "consider using -object memory-backend-file share=on"); 631 return -EINVAL; 632 } 633 634 msg->hdr.size = sizeof(msg->payload.memory.nregions); 635 msg->hdr.size += sizeof(msg->payload.memory.padding); 636 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 637 638 return 0; 639 } 640 641 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 642 struct vhost_memory_region *vdev_reg) 643 { 644 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 645 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 646 shadow_reg->memory_size == vdev_reg->memory_size; 647 } 648 649 static void scrub_shadow_regions(struct vhost_dev *dev, 650 struct scrub_regions *add_reg, 651 int *nr_add_reg, 652 struct scrub_regions *rem_reg, 653 int *nr_rem_reg, uint64_t *shadow_pcb, 654 bool track_ramblocks) 655 { 656 struct vhost_user *u = dev->opaque; 657 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 658 struct vhost_memory_region *reg, *shadow_reg; 659 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 660 ram_addr_t offset; 661 MemoryRegion *mr; 662 bool matching; 663 664 /* 665 * Find memory regions present in our shadow state which are not in 666 * the device's current memory state. 667 * 668 * Mark regions in both the shadow and device state as "found". 669 */ 670 for (i = 0; i < u->num_shadow_regions; i++) { 671 shadow_reg = &u->shadow_regions[i]; 672 matching = false; 673 674 for (j = 0; j < dev->mem->nregions; j++) { 675 reg = &dev->mem->regions[j]; 676 677 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 678 679 if (reg_equal(shadow_reg, reg)) { 680 matching = true; 681 found[j] = true; 682 if (track_ramblocks) { 683 /* 684 * Reset postcopy client bases, region_rb, and 685 * region_rb_offset in case regions are removed. 686 */ 687 if (fd > 0) { 688 u->region_rb_offset[j] = offset; 689 u->region_rb[j] = mr->ram_block; 690 shadow_pcb[j] = u->postcopy_client_bases[i]; 691 } else { 692 u->region_rb_offset[j] = 0; 693 u->region_rb[j] = NULL; 694 } 695 } 696 break; 697 } 698 } 699 700 /* 701 * If the region was not found in the current device memory state 702 * create an entry for it in the removed list. 703 */ 704 if (!matching) { 705 rem_reg[rm_idx].region = shadow_reg; 706 rem_reg[rm_idx++].reg_idx = i; 707 } 708 } 709 710 /* 711 * For regions not marked "found", create entries in the added list. 712 * 713 * Note their indexes in the device memory state and the indexes of their 714 * file descriptors. 715 */ 716 for (i = 0; i < dev->mem->nregions; i++) { 717 reg = &dev->mem->regions[i]; 718 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 719 if (fd > 0) { 720 ++fd_num; 721 } 722 723 /* 724 * If the region was in both the shadow and device state we don't 725 * need to send a VHOST_USER_ADD_MEM_REG message for it. 726 */ 727 if (found[i]) { 728 continue; 729 } 730 731 add_reg[add_idx].region = reg; 732 add_reg[add_idx].reg_idx = i; 733 add_reg[add_idx++].fd_idx = fd_num; 734 } 735 *nr_rem_reg = rm_idx; 736 *nr_add_reg = add_idx; 737 738 return; 739 } 740 741 static int send_remove_regions(struct vhost_dev *dev, 742 struct scrub_regions *remove_reg, 743 int nr_rem_reg, VhostUserMsg *msg, 744 bool reply_supported) 745 { 746 struct vhost_user *u = dev->opaque; 747 struct vhost_memory_region *shadow_reg; 748 int i, fd, shadow_reg_idx, ret; 749 ram_addr_t offset; 750 VhostUserMemoryRegion region_buffer; 751 752 /* 753 * The regions in remove_reg appear in the same order they do in the 754 * shadow table. Therefore we can minimize memory copies by iterating 755 * through remove_reg backwards. 756 */ 757 for (i = nr_rem_reg - 1; i >= 0; i--) { 758 shadow_reg = remove_reg[i].region; 759 shadow_reg_idx = remove_reg[i].reg_idx; 760 761 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 762 763 if (fd > 0) { 764 msg->hdr.request = VHOST_USER_REM_MEM_REG; 765 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 766 msg->payload.mem_reg.region = region_buffer; 767 768 ret = vhost_user_write(dev, msg, NULL, 0); 769 if (ret < 0) { 770 return ret; 771 } 772 773 if (reply_supported) { 774 ret = process_message_reply(dev, msg); 775 if (ret) { 776 return ret; 777 } 778 } 779 } 780 781 /* 782 * At this point we know the backend has unmapped the region. It is now 783 * safe to remove it from the shadow table. 784 */ 785 memmove(&u->shadow_regions[shadow_reg_idx], 786 &u->shadow_regions[shadow_reg_idx + 1], 787 sizeof(struct vhost_memory_region) * 788 (u->num_shadow_regions - shadow_reg_idx - 1)); 789 u->num_shadow_regions--; 790 } 791 792 return 0; 793 } 794 795 static int send_add_regions(struct vhost_dev *dev, 796 struct scrub_regions *add_reg, int nr_add_reg, 797 VhostUserMsg *msg, uint64_t *shadow_pcb, 798 bool reply_supported, bool track_ramblocks) 799 { 800 struct vhost_user *u = dev->opaque; 801 int i, fd, ret, reg_idx, reg_fd_idx; 802 struct vhost_memory_region *reg; 803 MemoryRegion *mr; 804 ram_addr_t offset; 805 VhostUserMsg msg_reply; 806 VhostUserMemoryRegion region_buffer; 807 808 for (i = 0; i < nr_add_reg; i++) { 809 reg = add_reg[i].region; 810 reg_idx = add_reg[i].reg_idx; 811 reg_fd_idx = add_reg[i].fd_idx; 812 813 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 814 815 if (fd > 0) { 816 if (track_ramblocks) { 817 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 818 reg->memory_size, 819 reg->guest_phys_addr, 820 reg->userspace_addr, 821 offset); 822 u->region_rb_offset[reg_idx] = offset; 823 u->region_rb[reg_idx] = mr->ram_block; 824 } 825 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 826 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 827 msg->payload.mem_reg.region = region_buffer; 828 829 ret = vhost_user_write(dev, msg, &fd, 1); 830 if (ret < 0) { 831 return ret; 832 } 833 834 if (track_ramblocks) { 835 uint64_t reply_gpa; 836 837 ret = vhost_user_read(dev, &msg_reply); 838 if (ret < 0) { 839 return ret; 840 } 841 842 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 843 844 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 845 error_report("%s: Received unexpected msg type." 846 "Expected %d received %d", __func__, 847 VHOST_USER_ADD_MEM_REG, 848 msg_reply.hdr.request); 849 return -EPROTO; 850 } 851 852 /* 853 * We're using the same structure, just reusing one of the 854 * fields, so it should be the same size. 855 */ 856 if (msg_reply.hdr.size != msg->hdr.size) { 857 error_report("%s: Unexpected size for postcopy reply " 858 "%d vs %d", __func__, msg_reply.hdr.size, 859 msg->hdr.size); 860 return -EPROTO; 861 } 862 863 /* Get the postcopy client base from the backend's reply. */ 864 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 865 shadow_pcb[reg_idx] = 866 msg_reply.payload.mem_reg.region.userspace_addr; 867 trace_vhost_user_set_mem_table_postcopy( 868 msg_reply.payload.mem_reg.region.userspace_addr, 869 msg->payload.mem_reg.region.userspace_addr, 870 reg_fd_idx, reg_idx); 871 } else { 872 error_report("%s: invalid postcopy reply for region. " 873 "Got guest physical address %" PRIX64 ", expected " 874 "%" PRIX64, __func__, reply_gpa, 875 dev->mem->regions[reg_idx].guest_phys_addr); 876 return -EPROTO; 877 } 878 } else if (reply_supported) { 879 ret = process_message_reply(dev, msg); 880 if (ret) { 881 return ret; 882 } 883 } 884 } else if (track_ramblocks) { 885 u->region_rb_offset[reg_idx] = 0; 886 u->region_rb[reg_idx] = NULL; 887 } 888 889 /* 890 * At this point, we know the backend has mapped in the new 891 * region, if the region has a valid file descriptor. 892 * 893 * The region should now be added to the shadow table. 894 */ 895 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 896 reg->guest_phys_addr; 897 u->shadow_regions[u->num_shadow_regions].userspace_addr = 898 reg->userspace_addr; 899 u->shadow_regions[u->num_shadow_regions].memory_size = 900 reg->memory_size; 901 u->num_shadow_regions++; 902 } 903 904 return 0; 905 } 906 907 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 908 VhostUserMsg *msg, 909 bool reply_supported, 910 bool track_ramblocks) 911 { 912 struct vhost_user *u = dev->opaque; 913 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 914 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 915 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 916 int nr_add_reg, nr_rem_reg; 917 int ret; 918 919 msg->hdr.size = sizeof(msg->payload.mem_reg); 920 921 /* Find the regions which need to be removed or added. */ 922 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 923 shadow_pcb, track_ramblocks); 924 925 if (nr_rem_reg) { 926 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 927 reply_supported); 928 if (ret < 0) { 929 goto err; 930 } 931 } 932 933 if (nr_add_reg) { 934 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 935 reply_supported, track_ramblocks); 936 if (ret < 0) { 937 goto err; 938 } 939 } 940 941 if (track_ramblocks) { 942 memcpy(u->postcopy_client_bases, shadow_pcb, 943 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 944 /* 945 * Now we've registered this with the postcopy code, we ack to the 946 * client, because now we're in the position to be able to deal with 947 * any faults it generates. 948 */ 949 /* TODO: Use this for failure cases as well with a bad value. */ 950 msg->hdr.size = sizeof(msg->payload.u64); 951 msg->payload.u64 = 0; /* OK */ 952 953 ret = vhost_user_write(dev, msg, NULL, 0); 954 if (ret < 0) { 955 return ret; 956 } 957 } 958 959 return 0; 960 961 err: 962 if (track_ramblocks) { 963 memcpy(u->postcopy_client_bases, shadow_pcb, 964 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 965 } 966 967 return ret; 968 } 969 970 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 971 struct vhost_memory *mem, 972 bool reply_supported, 973 bool config_mem_slots) 974 { 975 struct vhost_user *u = dev->opaque; 976 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 977 size_t fd_num = 0; 978 VhostUserMsg msg_reply; 979 int region_i, msg_i; 980 int ret; 981 982 VhostUserMsg msg = { 983 .hdr.flags = VHOST_USER_VERSION, 984 }; 985 986 if (u->region_rb_len < dev->mem->nregions) { 987 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 988 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 989 dev->mem->nregions); 990 memset(&(u->region_rb[u->region_rb_len]), '\0', 991 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 992 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 993 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 994 u->region_rb_len = dev->mem->nregions; 995 } 996 997 if (config_mem_slots) { 998 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 999 if (ret < 0) { 1000 return ret; 1001 } 1002 } else { 1003 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1004 true); 1005 if (ret < 0) { 1006 return ret; 1007 } 1008 1009 ret = vhost_user_write(dev, &msg, fds, fd_num); 1010 if (ret < 0) { 1011 return ret; 1012 } 1013 1014 ret = vhost_user_read(dev, &msg_reply); 1015 if (ret < 0) { 1016 return ret; 1017 } 1018 1019 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 1020 error_report("%s: Received unexpected msg type." 1021 "Expected %d received %d", __func__, 1022 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 1023 return -EPROTO; 1024 } 1025 1026 /* 1027 * We're using the same structure, just reusing one of the 1028 * fields, so it should be the same size. 1029 */ 1030 if (msg_reply.hdr.size != msg.hdr.size) { 1031 error_report("%s: Unexpected size for postcopy reply " 1032 "%d vs %d", __func__, msg_reply.hdr.size, 1033 msg.hdr.size); 1034 return -EPROTO; 1035 } 1036 1037 memset(u->postcopy_client_bases, 0, 1038 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1039 1040 /* 1041 * They're in the same order as the regions that were sent 1042 * but some of the regions were skipped (above) if they 1043 * didn't have fd's 1044 */ 1045 for (msg_i = 0, region_i = 0; 1046 region_i < dev->mem->nregions; 1047 region_i++) { 1048 if (msg_i < fd_num && 1049 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1050 dev->mem->regions[region_i].guest_phys_addr) { 1051 u->postcopy_client_bases[region_i] = 1052 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1053 trace_vhost_user_set_mem_table_postcopy( 1054 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1055 msg.payload.memory.regions[msg_i].userspace_addr, 1056 msg_i, region_i); 1057 msg_i++; 1058 } 1059 } 1060 if (msg_i != fd_num) { 1061 error_report("%s: postcopy reply not fully consumed " 1062 "%d vs %zd", 1063 __func__, msg_i, fd_num); 1064 return -EIO; 1065 } 1066 1067 /* 1068 * Now we've registered this with the postcopy code, we ack to the 1069 * client, because now we're in the position to be able to deal 1070 * with any faults it generates. 1071 */ 1072 /* TODO: Use this for failure cases as well with a bad value. */ 1073 msg.hdr.size = sizeof(msg.payload.u64); 1074 msg.payload.u64 = 0; /* OK */ 1075 ret = vhost_user_write(dev, &msg, NULL, 0); 1076 if (ret < 0) { 1077 return ret; 1078 } 1079 } 1080 1081 return 0; 1082 } 1083 1084 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1085 struct vhost_memory *mem) 1086 { 1087 struct vhost_user *u = dev->opaque; 1088 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1089 size_t fd_num = 0; 1090 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1091 bool reply_supported = virtio_has_feature(dev->protocol_features, 1092 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1093 bool config_mem_slots = 1094 virtio_has_feature(dev->protocol_features, 1095 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1096 int ret; 1097 1098 if (do_postcopy) { 1099 /* 1100 * Postcopy has enough differences that it's best done in it's own 1101 * version 1102 */ 1103 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1104 config_mem_slots); 1105 } 1106 1107 VhostUserMsg msg = { 1108 .hdr.flags = VHOST_USER_VERSION, 1109 }; 1110 1111 if (reply_supported) { 1112 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1113 } 1114 1115 if (config_mem_slots) { 1116 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1117 if (ret < 0) { 1118 return ret; 1119 } 1120 } else { 1121 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1122 false); 1123 if (ret < 0) { 1124 return ret; 1125 } 1126 1127 ret = vhost_user_write(dev, &msg, fds, fd_num); 1128 if (ret < 0) { 1129 return ret; 1130 } 1131 1132 if (reply_supported) { 1133 return process_message_reply(dev, &msg); 1134 } 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1141 struct vhost_vring_state *ring) 1142 { 1143 bool cross_endian = virtio_has_feature(dev->protocol_features, 1144 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1145 VhostUserMsg msg = { 1146 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1147 .hdr.flags = VHOST_USER_VERSION, 1148 .payload.state = *ring, 1149 .hdr.size = sizeof(msg.payload.state), 1150 }; 1151 1152 if (!cross_endian) { 1153 error_report("vhost-user trying to send unhandled ioctl"); 1154 return -ENOTSUP; 1155 } 1156 1157 return vhost_user_write(dev, &msg, NULL, 0); 1158 } 1159 1160 static int vhost_set_vring(struct vhost_dev *dev, 1161 unsigned long int request, 1162 struct vhost_vring_state *ring) 1163 { 1164 VhostUserMsg msg = { 1165 .hdr.request = request, 1166 .hdr.flags = VHOST_USER_VERSION, 1167 .payload.state = *ring, 1168 .hdr.size = sizeof(msg.payload.state), 1169 }; 1170 1171 return vhost_user_write(dev, &msg, NULL, 0); 1172 } 1173 1174 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1175 struct vhost_vring_state *ring) 1176 { 1177 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1178 } 1179 1180 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1181 { 1182 assert(n && n->unmap_addr); 1183 munmap(n->unmap_addr, qemu_real_host_page_size()); 1184 n->unmap_addr = NULL; 1185 } 1186 1187 /* 1188 * clean-up function for notifier, will finally free the structure 1189 * under rcu. 1190 */ 1191 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1192 VirtIODevice *vdev) 1193 { 1194 if (n->addr) { 1195 if (vdev) { 1196 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1197 } 1198 assert(!n->unmap_addr); 1199 n->unmap_addr = n->addr; 1200 n->addr = NULL; 1201 call_rcu(n, vhost_user_host_notifier_free, rcu); 1202 } 1203 } 1204 1205 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1206 struct vhost_vring_state *ring) 1207 { 1208 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1209 } 1210 1211 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1212 { 1213 int i; 1214 1215 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1216 return -EINVAL; 1217 } 1218 1219 for (i = 0; i < dev->nvqs; ++i) { 1220 int ret; 1221 struct vhost_vring_state state = { 1222 .index = dev->vq_index + i, 1223 .num = enable, 1224 }; 1225 1226 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1227 if (ret < 0) { 1228 /* 1229 * Restoring the previous state is likely infeasible, as well as 1230 * proceeding regardless the error, so just bail out and hope for 1231 * the device-level recovery. 1232 */ 1233 return ret; 1234 } 1235 } 1236 1237 return 0; 1238 } 1239 1240 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1241 int idx) 1242 { 1243 if (idx >= u->notifiers->len) { 1244 return NULL; 1245 } 1246 return g_ptr_array_index(u->notifiers, idx); 1247 } 1248 1249 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1250 struct vhost_vring_state *ring) 1251 { 1252 int ret; 1253 VhostUserMsg msg = { 1254 .hdr.request = VHOST_USER_GET_VRING_BASE, 1255 .hdr.flags = VHOST_USER_VERSION, 1256 .payload.state = *ring, 1257 .hdr.size = sizeof(msg.payload.state), 1258 }; 1259 struct vhost_user *u = dev->opaque; 1260 1261 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1262 if (n) { 1263 vhost_user_host_notifier_remove(n, dev->vdev); 1264 } 1265 1266 ret = vhost_user_write(dev, &msg, NULL, 0); 1267 if (ret < 0) { 1268 return ret; 1269 } 1270 1271 ret = vhost_user_read(dev, &msg); 1272 if (ret < 0) { 1273 return ret; 1274 } 1275 1276 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1277 error_report("Received unexpected msg type. Expected %d received %d", 1278 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1279 return -EPROTO; 1280 } 1281 1282 if (msg.hdr.size != sizeof(msg.payload.state)) { 1283 error_report("Received bad msg size."); 1284 return -EPROTO; 1285 } 1286 1287 *ring = msg.payload.state; 1288 1289 return 0; 1290 } 1291 1292 static int vhost_set_vring_file(struct vhost_dev *dev, 1293 VhostUserRequest request, 1294 struct vhost_vring_file *file) 1295 { 1296 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1297 size_t fd_num = 0; 1298 VhostUserMsg msg = { 1299 .hdr.request = request, 1300 .hdr.flags = VHOST_USER_VERSION, 1301 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1302 .hdr.size = sizeof(msg.payload.u64), 1303 }; 1304 1305 if (ioeventfd_enabled() && file->fd > 0) { 1306 fds[fd_num++] = file->fd; 1307 } else { 1308 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1309 } 1310 1311 return vhost_user_write(dev, &msg, fds, fd_num); 1312 } 1313 1314 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1315 struct vhost_vring_file *file) 1316 { 1317 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1318 } 1319 1320 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1321 struct vhost_vring_file *file) 1322 { 1323 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1324 } 1325 1326 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1327 struct vhost_vring_file *file) 1328 { 1329 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1330 } 1331 1332 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1333 { 1334 int ret; 1335 VhostUserMsg msg = { 1336 .hdr.request = request, 1337 .hdr.flags = VHOST_USER_VERSION, 1338 }; 1339 1340 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1341 return 0; 1342 } 1343 1344 ret = vhost_user_write(dev, &msg, NULL, 0); 1345 if (ret < 0) { 1346 return ret; 1347 } 1348 1349 ret = vhost_user_read(dev, &msg); 1350 if (ret < 0) { 1351 return ret; 1352 } 1353 1354 if (msg.hdr.request != request) { 1355 error_report("Received unexpected msg type. Expected %d received %d", 1356 request, msg.hdr.request); 1357 return -EPROTO; 1358 } 1359 1360 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1361 error_report("Received bad msg size."); 1362 return -EPROTO; 1363 } 1364 1365 *u64 = msg.payload.u64; 1366 1367 return 0; 1368 } 1369 1370 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1371 { 1372 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1373 return -EPROTO; 1374 } 1375 1376 return 0; 1377 } 1378 1379 static int enforce_reply(struct vhost_dev *dev, 1380 const VhostUserMsg *msg) 1381 { 1382 uint64_t dummy; 1383 1384 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1385 return process_message_reply(dev, msg); 1386 } 1387 1388 /* 1389 * We need to wait for a reply but the backend does not 1390 * support replies for the command we just sent. 1391 * Send VHOST_USER_GET_FEATURES which makes all backends 1392 * send a reply. 1393 */ 1394 return vhost_user_get_features(dev, &dummy); 1395 } 1396 1397 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1398 struct vhost_vring_addr *addr) 1399 { 1400 int ret; 1401 VhostUserMsg msg = { 1402 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1403 .hdr.flags = VHOST_USER_VERSION, 1404 .payload.addr = *addr, 1405 .hdr.size = sizeof(msg.payload.addr), 1406 }; 1407 1408 bool reply_supported = virtio_has_feature(dev->protocol_features, 1409 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1410 1411 /* 1412 * wait for a reply if logging is enabled to make sure 1413 * backend is actually logging changes 1414 */ 1415 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1416 1417 if (reply_supported && wait_for_reply) { 1418 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1419 } 1420 1421 ret = vhost_user_write(dev, &msg, NULL, 0); 1422 if (ret < 0) { 1423 return ret; 1424 } 1425 1426 if (wait_for_reply) { 1427 return enforce_reply(dev, &msg); 1428 } 1429 1430 return 0; 1431 } 1432 1433 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1434 bool wait_for_reply) 1435 { 1436 VhostUserMsg msg = { 1437 .hdr.request = request, 1438 .hdr.flags = VHOST_USER_VERSION, 1439 .payload.u64 = u64, 1440 .hdr.size = sizeof(msg.payload.u64), 1441 }; 1442 int ret; 1443 1444 if (wait_for_reply) { 1445 bool reply_supported = virtio_has_feature(dev->protocol_features, 1446 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1447 if (reply_supported) { 1448 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1449 } 1450 } 1451 1452 ret = vhost_user_write(dev, &msg, NULL, 0); 1453 if (ret < 0) { 1454 return ret; 1455 } 1456 1457 if (wait_for_reply) { 1458 return enforce_reply(dev, &msg); 1459 } 1460 1461 return 0; 1462 } 1463 1464 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1465 { 1466 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1467 } 1468 1469 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1470 { 1471 uint64_t value; 1472 int ret; 1473 1474 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1475 if (ret < 0) { 1476 return ret; 1477 } 1478 *status = value; 1479 1480 return 0; 1481 } 1482 1483 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1484 { 1485 uint8_t s; 1486 int ret; 1487 1488 ret = vhost_user_get_status(dev, &s); 1489 if (ret < 0) { 1490 return ret; 1491 } 1492 1493 if ((s & status) == status) { 1494 return 0; 1495 } 1496 s |= status; 1497 1498 return vhost_user_set_status(dev, s); 1499 } 1500 1501 static int vhost_user_set_features(struct vhost_dev *dev, 1502 uint64_t features) 1503 { 1504 /* 1505 * wait for a reply if logging is enabled to make sure 1506 * backend is actually logging changes 1507 */ 1508 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1509 int ret; 1510 1511 /* 1512 * We need to include any extra backend only feature bits that 1513 * might be needed by our device. Currently this includes the 1514 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1515 * features. 1516 */ 1517 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1518 features | dev->backend_features, 1519 log_enabled); 1520 1521 if (virtio_has_feature(dev->protocol_features, 1522 VHOST_USER_PROTOCOL_F_STATUS)) { 1523 if (!ret) { 1524 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1525 } 1526 } 1527 1528 return ret; 1529 } 1530 1531 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1532 uint64_t features) 1533 { 1534 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1535 false); 1536 } 1537 1538 static int vhost_user_set_owner(struct vhost_dev *dev) 1539 { 1540 VhostUserMsg msg = { 1541 .hdr.request = VHOST_USER_SET_OWNER, 1542 .hdr.flags = VHOST_USER_VERSION, 1543 }; 1544 1545 return vhost_user_write(dev, &msg, NULL, 0); 1546 } 1547 1548 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1549 uint64_t *max_memslots) 1550 { 1551 uint64_t backend_max_memslots; 1552 int err; 1553 1554 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1555 &backend_max_memslots); 1556 if (err < 0) { 1557 return err; 1558 } 1559 1560 *max_memslots = backend_max_memslots; 1561 1562 return 0; 1563 } 1564 1565 static int vhost_user_reset_device(struct vhost_dev *dev) 1566 { 1567 VhostUserMsg msg = { 1568 .hdr.flags = VHOST_USER_VERSION, 1569 }; 1570 1571 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1572 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1573 ? VHOST_USER_RESET_DEVICE 1574 : VHOST_USER_RESET_OWNER; 1575 1576 return vhost_user_write(dev, &msg, NULL, 0); 1577 } 1578 1579 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1580 { 1581 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1582 return -ENOSYS; 1583 } 1584 1585 return dev->config_ops->vhost_dev_config_notifier(dev); 1586 } 1587 1588 /* 1589 * Fetch or create the notifier for a given idx. Newly created 1590 * notifiers are added to the pointer array that tracks them. 1591 */ 1592 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1593 int idx) 1594 { 1595 VhostUserHostNotifier *n = NULL; 1596 if (idx >= u->notifiers->len) { 1597 g_ptr_array_set_size(u->notifiers, idx + 1); 1598 } 1599 1600 n = g_ptr_array_index(u->notifiers, idx); 1601 if (!n) { 1602 /* 1603 * In case notification arrive out-of-order, 1604 * make room for current index. 1605 */ 1606 g_ptr_array_remove_index(u->notifiers, idx); 1607 n = g_new0(VhostUserHostNotifier, 1); 1608 n->idx = idx; 1609 g_ptr_array_insert(u->notifiers, idx, n); 1610 trace_vhost_user_create_notifier(idx, n); 1611 } 1612 1613 return n; 1614 } 1615 1616 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1617 VhostUserVringArea *area, 1618 int fd) 1619 { 1620 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1621 size_t page_size = qemu_real_host_page_size(); 1622 struct vhost_user *u = dev->opaque; 1623 VhostUserState *user = u->user; 1624 VirtIODevice *vdev = dev->vdev; 1625 VhostUserHostNotifier *n; 1626 void *addr; 1627 char *name; 1628 1629 if (!virtio_has_feature(dev->protocol_features, 1630 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1631 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1632 return -EINVAL; 1633 } 1634 1635 /* 1636 * Fetch notifier and invalidate any old data before setting up 1637 * new mapped address. 1638 */ 1639 n = fetch_or_create_notifier(user, queue_idx); 1640 vhost_user_host_notifier_remove(n, vdev); 1641 1642 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1643 return 0; 1644 } 1645 1646 /* Sanity check. */ 1647 if (area->size != page_size) { 1648 return -EINVAL; 1649 } 1650 1651 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1652 fd, area->offset); 1653 if (addr == MAP_FAILED) { 1654 return -EFAULT; 1655 } 1656 1657 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1658 user, queue_idx); 1659 if (!n->mr.ram) { /* Don't init again after suspend. */ 1660 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1661 page_size, addr); 1662 } else { 1663 n->mr.ram_block->host = addr; 1664 } 1665 g_free(name); 1666 1667 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1668 object_unparent(OBJECT(&n->mr)); 1669 munmap(addr, page_size); 1670 return -ENXIO; 1671 } 1672 1673 n->addr = addr; 1674 1675 return 0; 1676 } 1677 1678 static void close_slave_channel(struct vhost_user *u) 1679 { 1680 g_source_destroy(u->slave_src); 1681 g_source_unref(u->slave_src); 1682 u->slave_src = NULL; 1683 object_unref(OBJECT(u->slave_ioc)); 1684 u->slave_ioc = NULL; 1685 } 1686 1687 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1688 gpointer opaque) 1689 { 1690 struct vhost_dev *dev = opaque; 1691 struct vhost_user *u = dev->opaque; 1692 VhostUserHeader hdr = { 0, }; 1693 VhostUserPayload payload = { 0, }; 1694 Error *local_err = NULL; 1695 gboolean rc = G_SOURCE_CONTINUE; 1696 int ret = 0; 1697 struct iovec iov; 1698 g_autofree int *fd = NULL; 1699 size_t fdsize = 0; 1700 int i; 1701 1702 /* Read header */ 1703 iov.iov_base = &hdr; 1704 iov.iov_len = VHOST_USER_HDR_SIZE; 1705 1706 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1707 error_report_err(local_err); 1708 goto err; 1709 } 1710 1711 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1712 error_report("Failed to read msg header." 1713 " Size %d exceeds the maximum %zu.", hdr.size, 1714 VHOST_USER_PAYLOAD_SIZE); 1715 goto err; 1716 } 1717 1718 /* Read payload */ 1719 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1720 error_report_err(local_err); 1721 goto err; 1722 } 1723 1724 switch (hdr.request) { 1725 case VHOST_USER_SLAVE_IOTLB_MSG: 1726 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1727 break; 1728 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1729 ret = vhost_user_slave_handle_config_change(dev); 1730 break; 1731 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1732 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1733 fd ? fd[0] : -1); 1734 break; 1735 default: 1736 error_report("Received unexpected msg type: %d.", hdr.request); 1737 ret = -EINVAL; 1738 } 1739 1740 /* 1741 * REPLY_ACK feature handling. Other reply types has to be managed 1742 * directly in their request handlers. 1743 */ 1744 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1745 struct iovec iovec[2]; 1746 1747 1748 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1749 hdr.flags |= VHOST_USER_REPLY_MASK; 1750 1751 payload.u64 = !!ret; 1752 hdr.size = sizeof(payload.u64); 1753 1754 iovec[0].iov_base = &hdr; 1755 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1756 iovec[1].iov_base = &payload; 1757 iovec[1].iov_len = hdr.size; 1758 1759 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1760 error_report_err(local_err); 1761 goto err; 1762 } 1763 } 1764 1765 goto fdcleanup; 1766 1767 err: 1768 close_slave_channel(u); 1769 rc = G_SOURCE_REMOVE; 1770 1771 fdcleanup: 1772 if (fd) { 1773 for (i = 0; i < fdsize; i++) { 1774 close(fd[i]); 1775 } 1776 } 1777 return rc; 1778 } 1779 1780 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1781 { 1782 VhostUserMsg msg = { 1783 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1784 .hdr.flags = VHOST_USER_VERSION, 1785 }; 1786 struct vhost_user *u = dev->opaque; 1787 int sv[2], ret = 0; 1788 bool reply_supported = virtio_has_feature(dev->protocol_features, 1789 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1790 Error *local_err = NULL; 1791 QIOChannel *ioc; 1792 1793 if (!virtio_has_feature(dev->protocol_features, 1794 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1795 return 0; 1796 } 1797 1798 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1799 int saved_errno = errno; 1800 error_report("socketpair() failed"); 1801 return -saved_errno; 1802 } 1803 1804 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1805 if (!ioc) { 1806 error_report_err(local_err); 1807 return -ECONNREFUSED; 1808 } 1809 u->slave_ioc = ioc; 1810 slave_update_read_handler(dev, NULL); 1811 1812 if (reply_supported) { 1813 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1814 } 1815 1816 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1817 if (ret) { 1818 goto out; 1819 } 1820 1821 if (reply_supported) { 1822 ret = process_message_reply(dev, &msg); 1823 } 1824 1825 out: 1826 close(sv[1]); 1827 if (ret) { 1828 close_slave_channel(u); 1829 } 1830 1831 return ret; 1832 } 1833 1834 #ifdef CONFIG_LINUX 1835 /* 1836 * Called back from the postcopy fault thread when a fault is received on our 1837 * ufd. 1838 * TODO: This is Linux specific 1839 */ 1840 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1841 void *ufd) 1842 { 1843 struct vhost_dev *dev = pcfd->data; 1844 struct vhost_user *u = dev->opaque; 1845 struct uffd_msg *msg = ufd; 1846 uint64_t faultaddr = msg->arg.pagefault.address; 1847 RAMBlock *rb = NULL; 1848 uint64_t rb_offset; 1849 int i; 1850 1851 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1852 dev->mem->nregions); 1853 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1854 trace_vhost_user_postcopy_fault_handler_loop(i, 1855 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1856 if (faultaddr >= u->postcopy_client_bases[i]) { 1857 /* Ofset of the fault address in the vhost region */ 1858 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1859 if (region_offset < dev->mem->regions[i].memory_size) { 1860 rb_offset = region_offset + u->region_rb_offset[i]; 1861 trace_vhost_user_postcopy_fault_handler_found(i, 1862 region_offset, rb_offset); 1863 rb = u->region_rb[i]; 1864 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1865 rb_offset); 1866 } 1867 } 1868 } 1869 error_report("%s: Failed to find region for fault %" PRIx64, 1870 __func__, faultaddr); 1871 return -1; 1872 } 1873 1874 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1875 uint64_t offset) 1876 { 1877 struct vhost_dev *dev = pcfd->data; 1878 struct vhost_user *u = dev->opaque; 1879 int i; 1880 1881 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1882 1883 if (!u) { 1884 return 0; 1885 } 1886 /* Translate the offset into an address in the clients address space */ 1887 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1888 if (u->region_rb[i] == rb && 1889 offset >= u->region_rb_offset[i] && 1890 offset < (u->region_rb_offset[i] + 1891 dev->mem->regions[i].memory_size)) { 1892 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1893 u->postcopy_client_bases[i]; 1894 trace_vhost_user_postcopy_waker_found(client_addr); 1895 return postcopy_wake_shared(pcfd, client_addr, rb); 1896 } 1897 } 1898 1899 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1900 return 0; 1901 } 1902 #endif 1903 1904 /* 1905 * Called at the start of an inbound postcopy on reception of the 1906 * 'advise' command. 1907 */ 1908 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1909 { 1910 #ifdef CONFIG_LINUX 1911 struct vhost_user *u = dev->opaque; 1912 CharBackend *chr = u->user->chr; 1913 int ufd; 1914 int ret; 1915 VhostUserMsg msg = { 1916 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1917 .hdr.flags = VHOST_USER_VERSION, 1918 }; 1919 1920 ret = vhost_user_write(dev, &msg, NULL, 0); 1921 if (ret < 0) { 1922 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1923 return ret; 1924 } 1925 1926 ret = vhost_user_read(dev, &msg); 1927 if (ret < 0) { 1928 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1929 return ret; 1930 } 1931 1932 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1933 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1934 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1935 return -EPROTO; 1936 } 1937 1938 if (msg.hdr.size) { 1939 error_setg(errp, "Received bad msg size."); 1940 return -EPROTO; 1941 } 1942 ufd = qemu_chr_fe_get_msgfd(chr); 1943 if (ufd < 0) { 1944 error_setg(errp, "%s: Failed to get ufd", __func__); 1945 return -EIO; 1946 } 1947 qemu_socket_set_nonblock(ufd); 1948 1949 /* register ufd with userfault thread */ 1950 u->postcopy_fd.fd = ufd; 1951 u->postcopy_fd.data = dev; 1952 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1953 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1954 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1955 postcopy_register_shared_ufd(&u->postcopy_fd); 1956 return 0; 1957 #else 1958 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1959 return -ENOSYS; 1960 #endif 1961 } 1962 1963 /* 1964 * Called at the switch to postcopy on reception of the 'listen' command. 1965 */ 1966 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1967 { 1968 struct vhost_user *u = dev->opaque; 1969 int ret; 1970 VhostUserMsg msg = { 1971 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1972 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1973 }; 1974 u->postcopy_listen = true; 1975 1976 trace_vhost_user_postcopy_listen(); 1977 1978 ret = vhost_user_write(dev, &msg, NULL, 0); 1979 if (ret < 0) { 1980 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1981 return ret; 1982 } 1983 1984 ret = process_message_reply(dev, &msg); 1985 if (ret) { 1986 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1987 return ret; 1988 } 1989 1990 return 0; 1991 } 1992 1993 /* 1994 * Called at the end of postcopy 1995 */ 1996 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1997 { 1998 VhostUserMsg msg = { 1999 .hdr.request = VHOST_USER_POSTCOPY_END, 2000 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2001 }; 2002 int ret; 2003 struct vhost_user *u = dev->opaque; 2004 2005 trace_vhost_user_postcopy_end_entry(); 2006 2007 ret = vhost_user_write(dev, &msg, NULL, 0); 2008 if (ret < 0) { 2009 error_setg(errp, "Failed to send postcopy_end to vhost"); 2010 return ret; 2011 } 2012 2013 ret = process_message_reply(dev, &msg); 2014 if (ret) { 2015 error_setg(errp, "Failed to receive reply to postcopy_end"); 2016 return ret; 2017 } 2018 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2019 close(u->postcopy_fd.fd); 2020 u->postcopy_fd.handler = NULL; 2021 2022 trace_vhost_user_postcopy_end_exit(); 2023 2024 return 0; 2025 } 2026 2027 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2028 void *opaque) 2029 { 2030 struct PostcopyNotifyData *pnd = opaque; 2031 struct vhost_user *u = container_of(notifier, struct vhost_user, 2032 postcopy_notifier); 2033 struct vhost_dev *dev = u->dev; 2034 2035 switch (pnd->reason) { 2036 case POSTCOPY_NOTIFY_PROBE: 2037 if (!virtio_has_feature(dev->protocol_features, 2038 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2039 /* TODO: Get the device name into this error somehow */ 2040 error_setg(pnd->errp, 2041 "vhost-user backend not capable of postcopy"); 2042 return -ENOENT; 2043 } 2044 break; 2045 2046 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2047 return vhost_user_postcopy_advise(dev, pnd->errp); 2048 2049 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2050 return vhost_user_postcopy_listen(dev, pnd->errp); 2051 2052 case POSTCOPY_NOTIFY_INBOUND_END: 2053 return vhost_user_postcopy_end(dev, pnd->errp); 2054 2055 default: 2056 /* We ignore notifications we don't know */ 2057 break; 2058 } 2059 2060 return 0; 2061 } 2062 2063 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2064 Error **errp) 2065 { 2066 uint64_t features, ram_slots; 2067 struct vhost_user *u; 2068 VhostUserState *vus = (VhostUserState *) opaque; 2069 int err; 2070 2071 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2072 2073 u = g_new0(struct vhost_user, 1); 2074 u->user = vus; 2075 u->dev = dev; 2076 dev->opaque = u; 2077 2078 err = vhost_user_get_features(dev, &features); 2079 if (err < 0) { 2080 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2081 return err; 2082 } 2083 2084 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2085 bool supports_f_config = vus->supports_config || 2086 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2087 uint64_t protocol_features; 2088 2089 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2090 2091 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2092 &protocol_features); 2093 if (err < 0) { 2094 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2095 return -EPROTO; 2096 } 2097 2098 /* 2099 * We will use all the protocol features we support - although 2100 * we suppress F_CONFIG if we know QEMUs internal code can not support 2101 * it. 2102 */ 2103 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2104 2105 if (supports_f_config) { 2106 if (!virtio_has_feature(protocol_features, 2107 VHOST_USER_PROTOCOL_F_CONFIG)) { 2108 error_setg(errp, "vhost-user device expecting " 2109 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2110 "not support it."); 2111 return -EPROTO; 2112 } 2113 } else { 2114 if (virtio_has_feature(protocol_features, 2115 VHOST_USER_PROTOCOL_F_CONFIG)) { 2116 warn_reportf_err(*errp, "vhost-user backend supports " 2117 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2118 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2119 } 2120 } 2121 2122 /* final set of protocol features */ 2123 dev->protocol_features = protocol_features; 2124 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2125 if (err < 0) { 2126 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2127 return -EPROTO; 2128 } 2129 2130 /* query the max queues we support if backend supports Multiple Queue */ 2131 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2132 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2133 &dev->max_queues); 2134 if (err < 0) { 2135 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2136 return -EPROTO; 2137 } 2138 } else { 2139 dev->max_queues = 1; 2140 } 2141 2142 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2143 error_setg(errp, "The maximum number of queues supported by the " 2144 "backend is %" PRIu64, dev->max_queues); 2145 return -EINVAL; 2146 } 2147 2148 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2149 !(virtio_has_feature(dev->protocol_features, 2150 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 2151 virtio_has_feature(dev->protocol_features, 2152 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2153 error_setg(errp, "IOMMU support requires reply-ack and " 2154 "slave-req protocol features."); 2155 return -EINVAL; 2156 } 2157 2158 /* get max memory regions if backend supports configurable RAM slots */ 2159 if (!virtio_has_feature(dev->protocol_features, 2160 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2161 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2162 } else { 2163 err = vhost_user_get_max_memslots(dev, &ram_slots); 2164 if (err < 0) { 2165 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2166 return -EPROTO; 2167 } 2168 2169 if (ram_slots < u->user->memory_slots) { 2170 error_setg(errp, "The backend specified a max ram slots limit " 2171 "of %" PRIu64", when the prior validated limit was " 2172 "%d. This limit should never decrease.", ram_slots, 2173 u->user->memory_slots); 2174 return -EINVAL; 2175 } 2176 2177 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2178 } 2179 } 2180 2181 if (dev->migration_blocker == NULL && 2182 !virtio_has_feature(dev->protocol_features, 2183 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2184 error_setg(&dev->migration_blocker, 2185 "Migration disabled: vhost-user backend lacks " 2186 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2187 } 2188 2189 if (dev->vq_index == 0) { 2190 err = vhost_setup_slave_channel(dev); 2191 if (err < 0) { 2192 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2193 return -EPROTO; 2194 } 2195 } 2196 2197 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2198 postcopy_add_notifier(&u->postcopy_notifier); 2199 2200 return 0; 2201 } 2202 2203 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2204 { 2205 struct vhost_user *u; 2206 2207 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2208 2209 u = dev->opaque; 2210 if (u->postcopy_notifier.notify) { 2211 postcopy_remove_notifier(&u->postcopy_notifier); 2212 u->postcopy_notifier.notify = NULL; 2213 } 2214 u->postcopy_listen = false; 2215 if (u->postcopy_fd.handler) { 2216 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2217 close(u->postcopy_fd.fd); 2218 u->postcopy_fd.handler = NULL; 2219 } 2220 if (u->slave_ioc) { 2221 close_slave_channel(u); 2222 } 2223 g_free(u->region_rb); 2224 u->region_rb = NULL; 2225 g_free(u->region_rb_offset); 2226 u->region_rb_offset = NULL; 2227 u->region_rb_len = 0; 2228 g_free(u); 2229 dev->opaque = 0; 2230 2231 return 0; 2232 } 2233 2234 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2235 { 2236 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2237 2238 return idx; 2239 } 2240 2241 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2242 { 2243 struct vhost_user *u = dev->opaque; 2244 2245 return u->user->memory_slots; 2246 } 2247 2248 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2249 { 2250 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2251 2252 return virtio_has_feature(dev->protocol_features, 2253 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2254 } 2255 2256 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2257 { 2258 VhostUserMsg msg = { }; 2259 2260 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2261 2262 /* If guest supports GUEST_ANNOUNCE do nothing */ 2263 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2264 return 0; 2265 } 2266 2267 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2268 if (virtio_has_feature(dev->protocol_features, 2269 VHOST_USER_PROTOCOL_F_RARP)) { 2270 msg.hdr.request = VHOST_USER_SEND_RARP; 2271 msg.hdr.flags = VHOST_USER_VERSION; 2272 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2273 msg.hdr.size = sizeof(msg.payload.u64); 2274 2275 return vhost_user_write(dev, &msg, NULL, 0); 2276 } 2277 return -ENOTSUP; 2278 } 2279 2280 static bool vhost_user_can_merge(struct vhost_dev *dev, 2281 uint64_t start1, uint64_t size1, 2282 uint64_t start2, uint64_t size2) 2283 { 2284 ram_addr_t offset; 2285 int mfd, rfd; 2286 2287 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2288 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2289 2290 return mfd == rfd; 2291 } 2292 2293 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2294 { 2295 VhostUserMsg msg; 2296 bool reply_supported = virtio_has_feature(dev->protocol_features, 2297 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2298 int ret; 2299 2300 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2301 return 0; 2302 } 2303 2304 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2305 msg.payload.u64 = mtu; 2306 msg.hdr.size = sizeof(msg.payload.u64); 2307 msg.hdr.flags = VHOST_USER_VERSION; 2308 if (reply_supported) { 2309 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2310 } 2311 2312 ret = vhost_user_write(dev, &msg, NULL, 0); 2313 if (ret < 0) { 2314 return ret; 2315 } 2316 2317 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2318 if (reply_supported) { 2319 return process_message_reply(dev, &msg); 2320 } 2321 2322 return 0; 2323 } 2324 2325 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2326 struct vhost_iotlb_msg *imsg) 2327 { 2328 int ret; 2329 VhostUserMsg msg = { 2330 .hdr.request = VHOST_USER_IOTLB_MSG, 2331 .hdr.size = sizeof(msg.payload.iotlb), 2332 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2333 .payload.iotlb = *imsg, 2334 }; 2335 2336 ret = vhost_user_write(dev, &msg, NULL, 0); 2337 if (ret < 0) { 2338 return ret; 2339 } 2340 2341 return process_message_reply(dev, &msg); 2342 } 2343 2344 2345 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2346 { 2347 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2348 } 2349 2350 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2351 uint32_t config_len, Error **errp) 2352 { 2353 int ret; 2354 VhostUserMsg msg = { 2355 .hdr.request = VHOST_USER_GET_CONFIG, 2356 .hdr.flags = VHOST_USER_VERSION, 2357 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2358 }; 2359 2360 if (!virtio_has_feature(dev->protocol_features, 2361 VHOST_USER_PROTOCOL_F_CONFIG)) { 2362 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2363 return -EINVAL; 2364 } 2365 2366 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2367 2368 msg.payload.config.offset = 0; 2369 msg.payload.config.size = config_len; 2370 ret = vhost_user_write(dev, &msg, NULL, 0); 2371 if (ret < 0) { 2372 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2373 return ret; 2374 } 2375 2376 ret = vhost_user_read(dev, &msg); 2377 if (ret < 0) { 2378 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2379 return ret; 2380 } 2381 2382 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2383 error_setg(errp, 2384 "Received unexpected msg type. Expected %d received %d", 2385 VHOST_USER_GET_CONFIG, msg.hdr.request); 2386 return -EPROTO; 2387 } 2388 2389 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2390 error_setg(errp, "Received bad msg size."); 2391 return -EPROTO; 2392 } 2393 2394 memcpy(config, msg.payload.config.region, config_len); 2395 2396 return 0; 2397 } 2398 2399 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2400 uint32_t offset, uint32_t size, uint32_t flags) 2401 { 2402 int ret; 2403 uint8_t *p; 2404 bool reply_supported = virtio_has_feature(dev->protocol_features, 2405 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2406 2407 VhostUserMsg msg = { 2408 .hdr.request = VHOST_USER_SET_CONFIG, 2409 .hdr.flags = VHOST_USER_VERSION, 2410 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2411 }; 2412 2413 if (!virtio_has_feature(dev->protocol_features, 2414 VHOST_USER_PROTOCOL_F_CONFIG)) { 2415 return -ENOTSUP; 2416 } 2417 2418 if (reply_supported) { 2419 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2420 } 2421 2422 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2423 return -EINVAL; 2424 } 2425 2426 msg.payload.config.offset = offset, 2427 msg.payload.config.size = size, 2428 msg.payload.config.flags = flags, 2429 p = msg.payload.config.region; 2430 memcpy(p, data, size); 2431 2432 ret = vhost_user_write(dev, &msg, NULL, 0); 2433 if (ret < 0) { 2434 return ret; 2435 } 2436 2437 if (reply_supported) { 2438 return process_message_reply(dev, &msg); 2439 } 2440 2441 return 0; 2442 } 2443 2444 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2445 void *session_info, 2446 uint64_t *session_id) 2447 { 2448 int ret; 2449 bool crypto_session = virtio_has_feature(dev->protocol_features, 2450 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2451 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2452 VhostUserMsg msg = { 2453 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2454 .hdr.flags = VHOST_USER_VERSION, 2455 .hdr.size = sizeof(msg.payload.session), 2456 }; 2457 2458 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2459 2460 if (!crypto_session) { 2461 error_report("vhost-user trying to send unhandled ioctl"); 2462 return -ENOTSUP; 2463 } 2464 2465 memcpy(&msg.payload.session.session_setup_data, sess_info, 2466 sizeof(CryptoDevBackendSymSessionInfo)); 2467 if (sess_info->key_len) { 2468 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2469 sess_info->key_len); 2470 } 2471 if (sess_info->auth_key_len > 0) { 2472 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2473 sess_info->auth_key_len); 2474 } 2475 ret = vhost_user_write(dev, &msg, NULL, 0); 2476 if (ret < 0) { 2477 error_report("vhost_user_write() return %d, create session failed", 2478 ret); 2479 return ret; 2480 } 2481 2482 ret = vhost_user_read(dev, &msg); 2483 if (ret < 0) { 2484 error_report("vhost_user_read() return %d, create session failed", 2485 ret); 2486 return ret; 2487 } 2488 2489 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2490 error_report("Received unexpected msg type. Expected %d received %d", 2491 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2492 return -EPROTO; 2493 } 2494 2495 if (msg.hdr.size != sizeof(msg.payload.session)) { 2496 error_report("Received bad msg size."); 2497 return -EPROTO; 2498 } 2499 2500 if (msg.payload.session.session_id < 0) { 2501 error_report("Bad session id: %" PRId64 "", 2502 msg.payload.session.session_id); 2503 return -EINVAL; 2504 } 2505 *session_id = msg.payload.session.session_id; 2506 2507 return 0; 2508 } 2509 2510 static int 2511 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2512 { 2513 int ret; 2514 bool crypto_session = virtio_has_feature(dev->protocol_features, 2515 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2516 VhostUserMsg msg = { 2517 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2518 .hdr.flags = VHOST_USER_VERSION, 2519 .hdr.size = sizeof(msg.payload.u64), 2520 }; 2521 msg.payload.u64 = session_id; 2522 2523 if (!crypto_session) { 2524 error_report("vhost-user trying to send unhandled ioctl"); 2525 return -ENOTSUP; 2526 } 2527 2528 ret = vhost_user_write(dev, &msg, NULL, 0); 2529 if (ret < 0) { 2530 error_report("vhost_user_write() return %d, close session failed", 2531 ret); 2532 return ret; 2533 } 2534 2535 return 0; 2536 } 2537 2538 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2539 MemoryRegionSection *section) 2540 { 2541 return memory_region_get_fd(section->mr) >= 0; 2542 } 2543 2544 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2545 uint16_t queue_size, 2546 struct vhost_inflight *inflight) 2547 { 2548 void *addr; 2549 int fd; 2550 int ret; 2551 struct vhost_user *u = dev->opaque; 2552 CharBackend *chr = u->user->chr; 2553 VhostUserMsg msg = { 2554 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2555 .hdr.flags = VHOST_USER_VERSION, 2556 .payload.inflight.num_queues = dev->nvqs, 2557 .payload.inflight.queue_size = queue_size, 2558 .hdr.size = sizeof(msg.payload.inflight), 2559 }; 2560 2561 if (!virtio_has_feature(dev->protocol_features, 2562 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2563 return 0; 2564 } 2565 2566 ret = vhost_user_write(dev, &msg, NULL, 0); 2567 if (ret < 0) { 2568 return ret; 2569 } 2570 2571 ret = vhost_user_read(dev, &msg); 2572 if (ret < 0) { 2573 return ret; 2574 } 2575 2576 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2577 error_report("Received unexpected msg type. " 2578 "Expected %d received %d", 2579 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2580 return -EPROTO; 2581 } 2582 2583 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2584 error_report("Received bad msg size."); 2585 return -EPROTO; 2586 } 2587 2588 if (!msg.payload.inflight.mmap_size) { 2589 return 0; 2590 } 2591 2592 fd = qemu_chr_fe_get_msgfd(chr); 2593 if (fd < 0) { 2594 error_report("Failed to get mem fd"); 2595 return -EIO; 2596 } 2597 2598 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2599 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2600 2601 if (addr == MAP_FAILED) { 2602 error_report("Failed to mmap mem fd"); 2603 close(fd); 2604 return -EFAULT; 2605 } 2606 2607 inflight->addr = addr; 2608 inflight->fd = fd; 2609 inflight->size = msg.payload.inflight.mmap_size; 2610 inflight->offset = msg.payload.inflight.mmap_offset; 2611 inflight->queue_size = queue_size; 2612 2613 return 0; 2614 } 2615 2616 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2617 struct vhost_inflight *inflight) 2618 { 2619 VhostUserMsg msg = { 2620 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2621 .hdr.flags = VHOST_USER_VERSION, 2622 .payload.inflight.mmap_size = inflight->size, 2623 .payload.inflight.mmap_offset = inflight->offset, 2624 .payload.inflight.num_queues = dev->nvqs, 2625 .payload.inflight.queue_size = inflight->queue_size, 2626 .hdr.size = sizeof(msg.payload.inflight), 2627 }; 2628 2629 if (!virtio_has_feature(dev->protocol_features, 2630 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2631 return 0; 2632 } 2633 2634 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2635 } 2636 2637 static void vhost_user_state_destroy(gpointer data) 2638 { 2639 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2640 if (n) { 2641 vhost_user_host_notifier_remove(n, NULL); 2642 object_unparent(OBJECT(&n->mr)); 2643 /* 2644 * We can't free until vhost_user_host_notifier_remove has 2645 * done it's thing so schedule the free with RCU. 2646 */ 2647 g_free_rcu(n, rcu); 2648 } 2649 } 2650 2651 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2652 { 2653 if (user->chr) { 2654 error_setg(errp, "Cannot initialize vhost-user state"); 2655 return false; 2656 } 2657 user->chr = chr; 2658 user->memory_slots = 0; 2659 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2660 &vhost_user_state_destroy); 2661 return true; 2662 } 2663 2664 void vhost_user_cleanup(VhostUserState *user) 2665 { 2666 if (!user->chr) { 2667 return; 2668 } 2669 memory_region_transaction_begin(); 2670 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2671 memory_region_transaction_commit(); 2672 user->chr = NULL; 2673 } 2674 2675 2676 typedef struct { 2677 vu_async_close_fn cb; 2678 DeviceState *dev; 2679 CharBackend *cd; 2680 struct vhost_dev *vhost; 2681 } VhostAsyncCallback; 2682 2683 static void vhost_user_async_close_bh(void *opaque) 2684 { 2685 VhostAsyncCallback *data = opaque; 2686 struct vhost_dev *vhost = data->vhost; 2687 2688 /* 2689 * If the vhost_dev has been cleared in the meantime there is 2690 * nothing left to do as some other path has completed the 2691 * cleanup. 2692 */ 2693 if (vhost->vdev) { 2694 data->cb(data->dev); 2695 } 2696 2697 g_free(data); 2698 } 2699 2700 /* 2701 * We only schedule the work if the machine is running. If suspended 2702 * we want to keep all the in-flight data as is for migration 2703 * purposes. 2704 */ 2705 void vhost_user_async_close(DeviceState *d, 2706 CharBackend *chardev, struct vhost_dev *vhost, 2707 vu_async_close_fn cb) 2708 { 2709 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2710 /* 2711 * A close event may happen during a read/write, but vhost 2712 * code assumes the vhost_dev remains setup, so delay the 2713 * stop & clear. 2714 */ 2715 AioContext *ctx = qemu_get_current_aio_context(); 2716 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2717 2718 /* Save data for the callback */ 2719 data->cb = cb; 2720 data->dev = d; 2721 data->cd = chardev; 2722 data->vhost = vhost; 2723 2724 /* Disable any further notifications on the chardev */ 2725 qemu_chr_fe_set_handlers(chardev, 2726 NULL, NULL, NULL, NULL, NULL, NULL, 2727 false); 2728 2729 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2730 2731 /* 2732 * Move vhost device to the stopped state. The vhost-user device 2733 * will be clean up and disconnected in BH. This can be useful in 2734 * the vhost migration code. If disconnect was caught there is an 2735 * option for the general vhost code to get the dev state without 2736 * knowing its type (in this case vhost-user). 2737 * 2738 * Note if the vhost device is fully cleared by the time we 2739 * execute the bottom half we won't continue with the cleanup. 2740 */ 2741 vhost->started = false; 2742 } 2743 } 2744 2745 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2746 { 2747 if (!virtio_has_feature(dev->protocol_features, 2748 VHOST_USER_PROTOCOL_F_STATUS)) { 2749 return 0; 2750 } 2751 2752 /* Set device status only for last queue pair */ 2753 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2754 return 0; 2755 } 2756 2757 if (started) { 2758 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2759 VIRTIO_CONFIG_S_DRIVER | 2760 VIRTIO_CONFIG_S_DRIVER_OK); 2761 } else { 2762 return vhost_user_set_status(dev, 0); 2763 } 2764 } 2765 2766 const VhostOps user_ops = { 2767 .backend_type = VHOST_BACKEND_TYPE_USER, 2768 .vhost_backend_init = vhost_user_backend_init, 2769 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2770 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2771 .vhost_set_log_base = vhost_user_set_log_base, 2772 .vhost_set_mem_table = vhost_user_set_mem_table, 2773 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2774 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2775 .vhost_set_vring_num = vhost_user_set_vring_num, 2776 .vhost_set_vring_base = vhost_user_set_vring_base, 2777 .vhost_get_vring_base = vhost_user_get_vring_base, 2778 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2779 .vhost_set_vring_call = vhost_user_set_vring_call, 2780 .vhost_set_vring_err = vhost_user_set_vring_err, 2781 .vhost_set_features = vhost_user_set_features, 2782 .vhost_get_features = vhost_user_get_features, 2783 .vhost_set_owner = vhost_user_set_owner, 2784 .vhost_reset_device = vhost_user_reset_device, 2785 .vhost_get_vq_index = vhost_user_get_vq_index, 2786 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2787 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2788 .vhost_migration_done = vhost_user_migration_done, 2789 .vhost_backend_can_merge = vhost_user_can_merge, 2790 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2791 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2792 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2793 .vhost_get_config = vhost_user_get_config, 2794 .vhost_set_config = vhost_user_set_config, 2795 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2796 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2797 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2798 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2799 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2800 .vhost_dev_start = vhost_user_dev_start, 2801 }; 2802