1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/cryptodev.h" 25 #include "migration/migration.h" 26 #include "migration/postcopy-ram.h" 27 #include "trace.h" 28 #include "exec/ramblock.h" 29 30 #include <sys/ioctl.h> 31 #include <sys/socket.h> 32 #include <sys/un.h> 33 34 #include "standard-headers/linux/vhost_types.h" 35 36 #ifdef CONFIG_LINUX 37 #include <linux/userfaultfd.h> 38 #endif 39 40 #define VHOST_MEMORY_BASELINE_NREGIONS 8 41 #define VHOST_USER_F_PROTOCOL_FEATURES 30 42 #define VHOST_USER_SLAVE_MAX_FDS 8 43 44 /* 45 * Set maximum number of RAM slots supported to 46 * the maximum number supported by the target 47 * hardware plaform. 48 */ 49 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 50 defined(TARGET_ARM) || defined(TARGET_ARM_64) 51 #include "hw/acpi/acpi.h" 52 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 53 54 #elif defined(TARGET_PPC) || defined(TARGET_PPC64) 55 #include "hw/ppc/spapr.h" 56 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 57 58 #else 59 #define VHOST_USER_MAX_RAM_SLOTS 512 60 #endif 61 62 /* 63 * Maximum size of virtio device config space 64 */ 65 #define VHOST_USER_MAX_CONFIG_SIZE 256 66 67 enum VhostUserProtocolFeature { 68 VHOST_USER_PROTOCOL_F_MQ = 0, 69 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 70 VHOST_USER_PROTOCOL_F_RARP = 2, 71 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 72 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 73 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 74 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 75 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 76 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 77 VHOST_USER_PROTOCOL_F_CONFIG = 9, 78 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 79 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 80 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 81 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 82 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 83 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 84 VHOST_USER_PROTOCOL_F_MAX 85 }; 86 87 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 88 89 typedef enum VhostUserRequest { 90 VHOST_USER_NONE = 0, 91 VHOST_USER_GET_FEATURES = 1, 92 VHOST_USER_SET_FEATURES = 2, 93 VHOST_USER_SET_OWNER = 3, 94 VHOST_USER_RESET_OWNER = 4, 95 VHOST_USER_SET_MEM_TABLE = 5, 96 VHOST_USER_SET_LOG_BASE = 6, 97 VHOST_USER_SET_LOG_FD = 7, 98 VHOST_USER_SET_VRING_NUM = 8, 99 VHOST_USER_SET_VRING_ADDR = 9, 100 VHOST_USER_SET_VRING_BASE = 10, 101 VHOST_USER_GET_VRING_BASE = 11, 102 VHOST_USER_SET_VRING_KICK = 12, 103 VHOST_USER_SET_VRING_CALL = 13, 104 VHOST_USER_SET_VRING_ERR = 14, 105 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 106 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 107 VHOST_USER_GET_QUEUE_NUM = 17, 108 VHOST_USER_SET_VRING_ENABLE = 18, 109 VHOST_USER_SEND_RARP = 19, 110 VHOST_USER_NET_SET_MTU = 20, 111 VHOST_USER_SET_SLAVE_REQ_FD = 21, 112 VHOST_USER_IOTLB_MSG = 22, 113 VHOST_USER_SET_VRING_ENDIAN = 23, 114 VHOST_USER_GET_CONFIG = 24, 115 VHOST_USER_SET_CONFIG = 25, 116 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 117 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 118 VHOST_USER_POSTCOPY_ADVISE = 28, 119 VHOST_USER_POSTCOPY_LISTEN = 29, 120 VHOST_USER_POSTCOPY_END = 30, 121 VHOST_USER_GET_INFLIGHT_FD = 31, 122 VHOST_USER_SET_INFLIGHT_FD = 32, 123 VHOST_USER_GPU_SET_SOCKET = 33, 124 VHOST_USER_RESET_DEVICE = 34, 125 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 126 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 127 VHOST_USER_ADD_MEM_REG = 37, 128 VHOST_USER_REM_MEM_REG = 38, 129 VHOST_USER_MAX 130 } VhostUserRequest; 131 132 typedef enum VhostUserSlaveRequest { 133 VHOST_USER_SLAVE_NONE = 0, 134 VHOST_USER_SLAVE_IOTLB_MSG = 1, 135 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 136 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 137 VHOST_USER_SLAVE_MAX 138 } VhostUserSlaveRequest; 139 140 typedef struct VhostUserMemoryRegion { 141 uint64_t guest_phys_addr; 142 uint64_t memory_size; 143 uint64_t userspace_addr; 144 uint64_t mmap_offset; 145 } VhostUserMemoryRegion; 146 147 typedef struct VhostUserMemory { 148 uint32_t nregions; 149 uint32_t padding; 150 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 151 } VhostUserMemory; 152 153 typedef struct VhostUserMemRegMsg { 154 uint64_t padding; 155 VhostUserMemoryRegion region; 156 } VhostUserMemRegMsg; 157 158 typedef struct VhostUserLog { 159 uint64_t mmap_size; 160 uint64_t mmap_offset; 161 } VhostUserLog; 162 163 typedef struct VhostUserConfig { 164 uint32_t offset; 165 uint32_t size; 166 uint32_t flags; 167 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 168 } VhostUserConfig; 169 170 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 171 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 172 173 typedef struct VhostUserCryptoSession { 174 /* session id for success, -1 on errors */ 175 int64_t session_id; 176 CryptoDevBackendSymSessionInfo session_setup_data; 177 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 178 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 179 } VhostUserCryptoSession; 180 181 static VhostUserConfig c __attribute__ ((unused)); 182 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 183 + sizeof(c.size) \ 184 + sizeof(c.flags)) 185 186 typedef struct VhostUserVringArea { 187 uint64_t u64; 188 uint64_t size; 189 uint64_t offset; 190 } VhostUserVringArea; 191 192 typedef struct VhostUserInflight { 193 uint64_t mmap_size; 194 uint64_t mmap_offset; 195 uint16_t num_queues; 196 uint16_t queue_size; 197 } VhostUserInflight; 198 199 typedef struct { 200 VhostUserRequest request; 201 202 #define VHOST_USER_VERSION_MASK (0x3) 203 #define VHOST_USER_REPLY_MASK (0x1<<2) 204 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 205 uint32_t flags; 206 uint32_t size; /* the following payload size */ 207 } QEMU_PACKED VhostUserHeader; 208 209 typedef union { 210 #define VHOST_USER_VRING_IDX_MASK (0xff) 211 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 212 uint64_t u64; 213 struct vhost_vring_state state; 214 struct vhost_vring_addr addr; 215 VhostUserMemory memory; 216 VhostUserMemRegMsg mem_reg; 217 VhostUserLog log; 218 struct vhost_iotlb_msg iotlb; 219 VhostUserConfig config; 220 VhostUserCryptoSession session; 221 VhostUserVringArea area; 222 VhostUserInflight inflight; 223 } VhostUserPayload; 224 225 typedef struct VhostUserMsg { 226 VhostUserHeader hdr; 227 VhostUserPayload payload; 228 } QEMU_PACKED VhostUserMsg; 229 230 static VhostUserMsg m __attribute__ ((unused)); 231 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 232 233 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 234 235 /* The version of the protocol we support */ 236 #define VHOST_USER_VERSION (0x1) 237 238 struct vhost_user { 239 struct vhost_dev *dev; 240 /* Shared between vhost devs of the same virtio device */ 241 VhostUserState *user; 242 QIOChannel *slave_ioc; 243 GSource *slave_src; 244 NotifierWithReturn postcopy_notifier; 245 struct PostCopyFD postcopy_fd; 246 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 247 /* Length of the region_rb and region_rb_offset arrays */ 248 size_t region_rb_len; 249 /* RAMBlock associated with a given region */ 250 RAMBlock **region_rb; 251 /* The offset from the start of the RAMBlock to the start of the 252 * vhost region. 253 */ 254 ram_addr_t *region_rb_offset; 255 256 /* True once we've entered postcopy_listen */ 257 bool postcopy_listen; 258 259 /* Our current regions */ 260 int num_shadow_regions; 261 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 262 }; 263 264 struct scrub_regions { 265 struct vhost_memory_region *region; 266 int reg_idx; 267 int fd_idx; 268 }; 269 270 static bool ioeventfd_enabled(void) 271 { 272 return !kvm_enabled() || kvm_eventfds_enabled(); 273 } 274 275 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 276 { 277 struct vhost_user *u = dev->opaque; 278 CharBackend *chr = u->user->chr; 279 uint8_t *p = (uint8_t *) msg; 280 int r, size = VHOST_USER_HDR_SIZE; 281 282 r = qemu_chr_fe_read_all(chr, p, size); 283 if (r != size) { 284 int saved_errno = errno; 285 error_report("Failed to read msg header. Read %d instead of %d." 286 " Original request %d.", r, size, msg->hdr.request); 287 return r < 0 ? -saved_errno : -EIO; 288 } 289 290 /* validate received flags */ 291 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 292 error_report("Failed to read msg header." 293 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 294 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 295 return -EPROTO; 296 } 297 298 return 0; 299 } 300 301 struct vhost_user_read_cb_data { 302 struct vhost_dev *dev; 303 VhostUserMsg *msg; 304 GMainLoop *loop; 305 int ret; 306 }; 307 308 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 309 gpointer opaque) 310 { 311 struct vhost_user_read_cb_data *data = opaque; 312 struct vhost_dev *dev = data->dev; 313 VhostUserMsg *msg = data->msg; 314 struct vhost_user *u = dev->opaque; 315 CharBackend *chr = u->user->chr; 316 uint8_t *p = (uint8_t *) msg; 317 int r, size; 318 319 r = vhost_user_read_header(dev, msg); 320 if (r < 0) { 321 data->ret = r; 322 goto end; 323 } 324 325 /* validate message size is sane */ 326 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 327 error_report("Failed to read msg header." 328 " Size %d exceeds the maximum %zu.", msg->hdr.size, 329 VHOST_USER_PAYLOAD_SIZE); 330 data->ret = -EPROTO; 331 goto end; 332 } 333 334 if (msg->hdr.size) { 335 p += VHOST_USER_HDR_SIZE; 336 size = msg->hdr.size; 337 r = qemu_chr_fe_read_all(chr, p, size); 338 if (r != size) { 339 int saved_errno = errno; 340 error_report("Failed to read msg payload." 341 " Read %d instead of %d.", r, msg->hdr.size); 342 data->ret = r < 0 ? -saved_errno : -EIO; 343 goto end; 344 } 345 } 346 347 end: 348 g_main_loop_quit(data->loop); 349 return G_SOURCE_REMOVE; 350 } 351 352 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 353 gpointer opaque); 354 355 /* 356 * This updates the read handler to use a new event loop context. 357 * Event sources are removed from the previous context : this ensures 358 * that events detected in the previous context are purged. They will 359 * be re-detected and processed in the new context. 360 */ 361 static void slave_update_read_handler(struct vhost_dev *dev, 362 GMainContext *ctxt) 363 { 364 struct vhost_user *u = dev->opaque; 365 366 if (!u->slave_ioc) { 367 return; 368 } 369 370 if (u->slave_src) { 371 g_source_destroy(u->slave_src); 372 g_source_unref(u->slave_src); 373 } 374 375 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 376 G_IO_IN | G_IO_HUP, 377 slave_read, dev, NULL, 378 ctxt); 379 } 380 381 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 382 { 383 struct vhost_user *u = dev->opaque; 384 CharBackend *chr = u->user->chr; 385 GMainContext *prev_ctxt = chr->chr->gcontext; 386 GMainContext *ctxt = g_main_context_new(); 387 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 388 struct vhost_user_read_cb_data data = { 389 .dev = dev, 390 .loop = loop, 391 .msg = msg, 392 .ret = 0 393 }; 394 395 /* 396 * We want to be able to monitor the slave channel fd while waiting 397 * for chr I/O. This requires an event loop, but we can't nest the 398 * one to which chr is currently attached : its fd handlers might not 399 * be prepared for re-entrancy. So we create a new one and switch chr 400 * to use it. 401 */ 402 slave_update_read_handler(dev, ctxt); 403 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 404 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 405 406 g_main_loop_run(loop); 407 408 /* 409 * Restore the previous event loop context. This also destroys/recreates 410 * event sources : this guarantees that all pending events in the original 411 * context that have been processed by the nested loop are purged. 412 */ 413 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 414 slave_update_read_handler(dev, NULL); 415 416 g_main_loop_unref(loop); 417 g_main_context_unref(ctxt); 418 419 return data.ret; 420 } 421 422 static int process_message_reply(struct vhost_dev *dev, 423 const VhostUserMsg *msg) 424 { 425 int ret; 426 VhostUserMsg msg_reply; 427 428 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 429 return 0; 430 } 431 432 ret = vhost_user_read(dev, &msg_reply); 433 if (ret < 0) { 434 return ret; 435 } 436 437 if (msg_reply.hdr.request != msg->hdr.request) { 438 error_report("Received unexpected msg type. " 439 "Expected %d received %d", 440 msg->hdr.request, msg_reply.hdr.request); 441 return -EPROTO; 442 } 443 444 return msg_reply.payload.u64 ? -EIO : 0; 445 } 446 447 static bool vhost_user_one_time_request(VhostUserRequest request) 448 { 449 switch (request) { 450 case VHOST_USER_SET_OWNER: 451 case VHOST_USER_RESET_OWNER: 452 case VHOST_USER_SET_MEM_TABLE: 453 case VHOST_USER_GET_QUEUE_NUM: 454 case VHOST_USER_NET_SET_MTU: 455 return true; 456 default: 457 return false; 458 } 459 } 460 461 /* most non-init callers ignore the error */ 462 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 463 int *fds, int fd_num) 464 { 465 struct vhost_user *u = dev->opaque; 466 CharBackend *chr = u->user->chr; 467 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 468 469 /* 470 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 471 * we just need send it once in the first time. For later such 472 * request, we just ignore it. 473 */ 474 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 475 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 476 return 0; 477 } 478 479 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 480 error_report("Failed to set msg fds."); 481 return -EINVAL; 482 } 483 484 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 485 if (ret != size) { 486 int saved_errno = errno; 487 error_report("Failed to write msg." 488 " Wrote %d instead of %d.", ret, size); 489 return ret < 0 ? -saved_errno : -EIO; 490 } 491 492 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 493 494 return 0; 495 } 496 497 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 498 { 499 VhostUserMsg msg = { 500 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 501 .hdr.flags = VHOST_USER_VERSION, 502 }; 503 504 return vhost_user_write(dev, &msg, &fd, 1); 505 } 506 507 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 508 struct vhost_log *log) 509 { 510 int fds[VHOST_USER_MAX_RAM_SLOTS]; 511 size_t fd_num = 0; 512 bool shmfd = virtio_has_feature(dev->protocol_features, 513 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 514 int ret; 515 VhostUserMsg msg = { 516 .hdr.request = VHOST_USER_SET_LOG_BASE, 517 .hdr.flags = VHOST_USER_VERSION, 518 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 519 .payload.log.mmap_offset = 0, 520 .hdr.size = sizeof(msg.payload.log), 521 }; 522 523 if (shmfd && log->fd != -1) { 524 fds[fd_num++] = log->fd; 525 } 526 527 ret = vhost_user_write(dev, &msg, fds, fd_num); 528 if (ret < 0) { 529 return ret; 530 } 531 532 if (shmfd) { 533 msg.hdr.size = 0; 534 ret = vhost_user_read(dev, &msg); 535 if (ret < 0) { 536 return ret; 537 } 538 539 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 540 error_report("Received unexpected msg type. " 541 "Expected %d received %d", 542 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 543 return -EPROTO; 544 } 545 } 546 547 trace_vhost_user_read(msg.hdr.request, msg.hdr.flags); 548 549 return 0; 550 } 551 552 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 553 int *fd) 554 { 555 MemoryRegion *mr; 556 557 assert((uintptr_t)addr == addr); 558 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 559 *fd = memory_region_get_fd(mr); 560 561 return mr; 562 } 563 564 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 565 struct vhost_memory_region *src, 566 uint64_t mmap_offset) 567 { 568 assert(src != NULL && dst != NULL); 569 dst->userspace_addr = src->userspace_addr; 570 dst->memory_size = src->memory_size; 571 dst->guest_phys_addr = src->guest_phys_addr; 572 dst->mmap_offset = mmap_offset; 573 } 574 575 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 576 struct vhost_dev *dev, 577 VhostUserMsg *msg, 578 int *fds, size_t *fd_num, 579 bool track_ramblocks) 580 { 581 int i, fd; 582 ram_addr_t offset; 583 MemoryRegion *mr; 584 struct vhost_memory_region *reg; 585 VhostUserMemoryRegion region_buffer; 586 587 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 588 589 for (i = 0; i < dev->mem->nregions; ++i) { 590 reg = dev->mem->regions + i; 591 592 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 593 if (fd > 0) { 594 if (track_ramblocks) { 595 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 596 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 597 reg->memory_size, 598 reg->guest_phys_addr, 599 reg->userspace_addr, 600 offset); 601 u->region_rb_offset[i] = offset; 602 u->region_rb[i] = mr->ram_block; 603 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 604 error_report("Failed preparing vhost-user memory table msg"); 605 return -ENOBUFS; 606 } 607 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 608 msg->payload.memory.regions[*fd_num] = region_buffer; 609 fds[(*fd_num)++] = fd; 610 } else if (track_ramblocks) { 611 u->region_rb_offset[i] = 0; 612 u->region_rb[i] = NULL; 613 } 614 } 615 616 msg->payload.memory.nregions = *fd_num; 617 618 if (!*fd_num) { 619 error_report("Failed initializing vhost-user memory map, " 620 "consider using -object memory-backend-file share=on"); 621 return -EINVAL; 622 } 623 624 msg->hdr.size = sizeof(msg->payload.memory.nregions); 625 msg->hdr.size += sizeof(msg->payload.memory.padding); 626 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 627 628 return 0; 629 } 630 631 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 632 struct vhost_memory_region *vdev_reg) 633 { 634 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 635 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 636 shadow_reg->memory_size == vdev_reg->memory_size; 637 } 638 639 static void scrub_shadow_regions(struct vhost_dev *dev, 640 struct scrub_regions *add_reg, 641 int *nr_add_reg, 642 struct scrub_regions *rem_reg, 643 int *nr_rem_reg, uint64_t *shadow_pcb, 644 bool track_ramblocks) 645 { 646 struct vhost_user *u = dev->opaque; 647 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 648 struct vhost_memory_region *reg, *shadow_reg; 649 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 650 ram_addr_t offset; 651 MemoryRegion *mr; 652 bool matching; 653 654 /* 655 * Find memory regions present in our shadow state which are not in 656 * the device's current memory state. 657 * 658 * Mark regions in both the shadow and device state as "found". 659 */ 660 for (i = 0; i < u->num_shadow_regions; i++) { 661 shadow_reg = &u->shadow_regions[i]; 662 matching = false; 663 664 for (j = 0; j < dev->mem->nregions; j++) { 665 reg = &dev->mem->regions[j]; 666 667 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 668 669 if (reg_equal(shadow_reg, reg)) { 670 matching = true; 671 found[j] = true; 672 if (track_ramblocks) { 673 /* 674 * Reset postcopy client bases, region_rb, and 675 * region_rb_offset in case regions are removed. 676 */ 677 if (fd > 0) { 678 u->region_rb_offset[j] = offset; 679 u->region_rb[j] = mr->ram_block; 680 shadow_pcb[j] = u->postcopy_client_bases[i]; 681 } else { 682 u->region_rb_offset[j] = 0; 683 u->region_rb[j] = NULL; 684 } 685 } 686 break; 687 } 688 } 689 690 /* 691 * If the region was not found in the current device memory state 692 * create an entry for it in the removed list. 693 */ 694 if (!matching) { 695 rem_reg[rm_idx].region = shadow_reg; 696 rem_reg[rm_idx++].reg_idx = i; 697 } 698 } 699 700 /* 701 * For regions not marked "found", create entries in the added list. 702 * 703 * Note their indexes in the device memory state and the indexes of their 704 * file descriptors. 705 */ 706 for (i = 0; i < dev->mem->nregions; i++) { 707 reg = &dev->mem->regions[i]; 708 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 709 if (fd > 0) { 710 ++fd_num; 711 } 712 713 /* 714 * If the region was in both the shadow and device state we don't 715 * need to send a VHOST_USER_ADD_MEM_REG message for it. 716 */ 717 if (found[i]) { 718 continue; 719 } 720 721 add_reg[add_idx].region = reg; 722 add_reg[add_idx].reg_idx = i; 723 add_reg[add_idx++].fd_idx = fd_num; 724 } 725 *nr_rem_reg = rm_idx; 726 *nr_add_reg = add_idx; 727 728 return; 729 } 730 731 static int send_remove_regions(struct vhost_dev *dev, 732 struct scrub_regions *remove_reg, 733 int nr_rem_reg, VhostUserMsg *msg, 734 bool reply_supported) 735 { 736 struct vhost_user *u = dev->opaque; 737 struct vhost_memory_region *shadow_reg; 738 int i, fd, shadow_reg_idx, ret; 739 ram_addr_t offset; 740 VhostUserMemoryRegion region_buffer; 741 742 /* 743 * The regions in remove_reg appear in the same order they do in the 744 * shadow table. Therefore we can minimize memory copies by iterating 745 * through remove_reg backwards. 746 */ 747 for (i = nr_rem_reg - 1; i >= 0; i--) { 748 shadow_reg = remove_reg[i].region; 749 shadow_reg_idx = remove_reg[i].reg_idx; 750 751 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 752 753 if (fd > 0) { 754 msg->hdr.request = VHOST_USER_REM_MEM_REG; 755 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 756 msg->payload.mem_reg.region = region_buffer; 757 758 ret = vhost_user_write(dev, msg, NULL, 0); 759 if (ret < 0) { 760 return ret; 761 } 762 763 if (reply_supported) { 764 ret = process_message_reply(dev, msg); 765 if (ret) { 766 return ret; 767 } 768 } 769 } 770 771 /* 772 * At this point we know the backend has unmapped the region. It is now 773 * safe to remove it from the shadow table. 774 */ 775 memmove(&u->shadow_regions[shadow_reg_idx], 776 &u->shadow_regions[shadow_reg_idx + 1], 777 sizeof(struct vhost_memory_region) * 778 (u->num_shadow_regions - shadow_reg_idx - 1)); 779 u->num_shadow_regions--; 780 } 781 782 return 0; 783 } 784 785 static int send_add_regions(struct vhost_dev *dev, 786 struct scrub_regions *add_reg, int nr_add_reg, 787 VhostUserMsg *msg, uint64_t *shadow_pcb, 788 bool reply_supported, bool track_ramblocks) 789 { 790 struct vhost_user *u = dev->opaque; 791 int i, fd, ret, reg_idx, reg_fd_idx; 792 struct vhost_memory_region *reg; 793 MemoryRegion *mr; 794 ram_addr_t offset; 795 VhostUserMsg msg_reply; 796 VhostUserMemoryRegion region_buffer; 797 798 for (i = 0; i < nr_add_reg; i++) { 799 reg = add_reg[i].region; 800 reg_idx = add_reg[i].reg_idx; 801 reg_fd_idx = add_reg[i].fd_idx; 802 803 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 804 805 if (fd > 0) { 806 if (track_ramblocks) { 807 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 808 reg->memory_size, 809 reg->guest_phys_addr, 810 reg->userspace_addr, 811 offset); 812 u->region_rb_offset[reg_idx] = offset; 813 u->region_rb[reg_idx] = mr->ram_block; 814 } 815 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 816 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 817 msg->payload.mem_reg.region = region_buffer; 818 819 ret = vhost_user_write(dev, msg, &fd, 1); 820 if (ret < 0) { 821 return ret; 822 } 823 824 if (track_ramblocks) { 825 uint64_t reply_gpa; 826 827 ret = vhost_user_read(dev, &msg_reply); 828 if (ret < 0) { 829 return ret; 830 } 831 832 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 833 834 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 835 error_report("%s: Received unexpected msg type." 836 "Expected %d received %d", __func__, 837 VHOST_USER_ADD_MEM_REG, 838 msg_reply.hdr.request); 839 return -EPROTO; 840 } 841 842 /* 843 * We're using the same structure, just reusing one of the 844 * fields, so it should be the same size. 845 */ 846 if (msg_reply.hdr.size != msg->hdr.size) { 847 error_report("%s: Unexpected size for postcopy reply " 848 "%d vs %d", __func__, msg_reply.hdr.size, 849 msg->hdr.size); 850 return -EPROTO; 851 } 852 853 /* Get the postcopy client base from the backend's reply. */ 854 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 855 shadow_pcb[reg_idx] = 856 msg_reply.payload.mem_reg.region.userspace_addr; 857 trace_vhost_user_set_mem_table_postcopy( 858 msg_reply.payload.mem_reg.region.userspace_addr, 859 msg->payload.mem_reg.region.userspace_addr, 860 reg_fd_idx, reg_idx); 861 } else { 862 error_report("%s: invalid postcopy reply for region. " 863 "Got guest physical address %" PRIX64 ", expected " 864 "%" PRIX64, __func__, reply_gpa, 865 dev->mem->regions[reg_idx].guest_phys_addr); 866 return -EPROTO; 867 } 868 } else if (reply_supported) { 869 ret = process_message_reply(dev, msg); 870 if (ret) { 871 return ret; 872 } 873 } 874 } else if (track_ramblocks) { 875 u->region_rb_offset[reg_idx] = 0; 876 u->region_rb[reg_idx] = NULL; 877 } 878 879 /* 880 * At this point, we know the backend has mapped in the new 881 * region, if the region has a valid file descriptor. 882 * 883 * The region should now be added to the shadow table. 884 */ 885 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 886 reg->guest_phys_addr; 887 u->shadow_regions[u->num_shadow_regions].userspace_addr = 888 reg->userspace_addr; 889 u->shadow_regions[u->num_shadow_regions].memory_size = 890 reg->memory_size; 891 u->num_shadow_regions++; 892 } 893 894 return 0; 895 } 896 897 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 898 VhostUserMsg *msg, 899 bool reply_supported, 900 bool track_ramblocks) 901 { 902 struct vhost_user *u = dev->opaque; 903 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 904 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 905 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 906 int nr_add_reg, nr_rem_reg; 907 int ret; 908 909 msg->hdr.size = sizeof(msg->payload.mem_reg); 910 911 /* Find the regions which need to be removed or added. */ 912 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 913 shadow_pcb, track_ramblocks); 914 915 if (nr_rem_reg) { 916 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 917 reply_supported); 918 if (ret < 0) { 919 goto err; 920 } 921 } 922 923 if (nr_add_reg) { 924 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 925 reply_supported, track_ramblocks); 926 if (ret < 0) { 927 goto err; 928 } 929 } 930 931 if (track_ramblocks) { 932 memcpy(u->postcopy_client_bases, shadow_pcb, 933 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 934 /* 935 * Now we've registered this with the postcopy code, we ack to the 936 * client, because now we're in the position to be able to deal with 937 * any faults it generates. 938 */ 939 /* TODO: Use this for failure cases as well with a bad value. */ 940 msg->hdr.size = sizeof(msg->payload.u64); 941 msg->payload.u64 = 0; /* OK */ 942 943 ret = vhost_user_write(dev, msg, NULL, 0); 944 if (ret < 0) { 945 return ret; 946 } 947 } 948 949 return 0; 950 951 err: 952 if (track_ramblocks) { 953 memcpy(u->postcopy_client_bases, shadow_pcb, 954 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 955 } 956 957 return ret; 958 } 959 960 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 961 struct vhost_memory *mem, 962 bool reply_supported, 963 bool config_mem_slots) 964 { 965 struct vhost_user *u = dev->opaque; 966 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 967 size_t fd_num = 0; 968 VhostUserMsg msg_reply; 969 int region_i, msg_i; 970 int ret; 971 972 VhostUserMsg msg = { 973 .hdr.flags = VHOST_USER_VERSION, 974 }; 975 976 if (u->region_rb_len < dev->mem->nregions) { 977 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 978 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 979 dev->mem->nregions); 980 memset(&(u->region_rb[u->region_rb_len]), '\0', 981 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 982 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 983 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 984 u->region_rb_len = dev->mem->nregions; 985 } 986 987 if (config_mem_slots) { 988 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 989 if (ret < 0) { 990 return ret; 991 } 992 } else { 993 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 994 true); 995 if (ret < 0) { 996 return ret; 997 } 998 999 ret = vhost_user_write(dev, &msg, fds, fd_num); 1000 if (ret < 0) { 1001 return ret; 1002 } 1003 1004 ret = vhost_user_read(dev, &msg_reply); 1005 if (ret < 0) { 1006 return ret; 1007 } 1008 1009 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 1010 error_report("%s: Received unexpected msg type." 1011 "Expected %d received %d", __func__, 1012 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 1013 return -EPROTO; 1014 } 1015 1016 /* 1017 * We're using the same structure, just reusing one of the 1018 * fields, so it should be the same size. 1019 */ 1020 if (msg_reply.hdr.size != msg.hdr.size) { 1021 error_report("%s: Unexpected size for postcopy reply " 1022 "%d vs %d", __func__, msg_reply.hdr.size, 1023 msg.hdr.size); 1024 return -EPROTO; 1025 } 1026 1027 memset(u->postcopy_client_bases, 0, 1028 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1029 1030 /* 1031 * They're in the same order as the regions that were sent 1032 * but some of the regions were skipped (above) if they 1033 * didn't have fd's 1034 */ 1035 for (msg_i = 0, region_i = 0; 1036 region_i < dev->mem->nregions; 1037 region_i++) { 1038 if (msg_i < fd_num && 1039 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1040 dev->mem->regions[region_i].guest_phys_addr) { 1041 u->postcopy_client_bases[region_i] = 1042 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1043 trace_vhost_user_set_mem_table_postcopy( 1044 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1045 msg.payload.memory.regions[msg_i].userspace_addr, 1046 msg_i, region_i); 1047 msg_i++; 1048 } 1049 } 1050 if (msg_i != fd_num) { 1051 error_report("%s: postcopy reply not fully consumed " 1052 "%d vs %zd", 1053 __func__, msg_i, fd_num); 1054 return -EIO; 1055 } 1056 1057 /* 1058 * Now we've registered this with the postcopy code, we ack to the 1059 * client, because now we're in the position to be able to deal 1060 * with any faults it generates. 1061 */ 1062 /* TODO: Use this for failure cases as well with a bad value. */ 1063 msg.hdr.size = sizeof(msg.payload.u64); 1064 msg.payload.u64 = 0; /* OK */ 1065 ret = vhost_user_write(dev, &msg, NULL, 0); 1066 if (ret < 0) { 1067 return ret; 1068 } 1069 } 1070 1071 return 0; 1072 } 1073 1074 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1075 struct vhost_memory *mem) 1076 { 1077 struct vhost_user *u = dev->opaque; 1078 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1079 size_t fd_num = 0; 1080 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1081 bool reply_supported = virtio_has_feature(dev->protocol_features, 1082 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1083 bool config_mem_slots = 1084 virtio_has_feature(dev->protocol_features, 1085 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1086 int ret; 1087 1088 if (do_postcopy) { 1089 /* 1090 * Postcopy has enough differences that it's best done in it's own 1091 * version 1092 */ 1093 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1094 config_mem_slots); 1095 } 1096 1097 VhostUserMsg msg = { 1098 .hdr.flags = VHOST_USER_VERSION, 1099 }; 1100 1101 if (reply_supported) { 1102 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1103 } 1104 1105 if (config_mem_slots) { 1106 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1107 if (ret < 0) { 1108 return ret; 1109 } 1110 } else { 1111 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1112 false); 1113 if (ret < 0) { 1114 return ret; 1115 } 1116 1117 ret = vhost_user_write(dev, &msg, fds, fd_num); 1118 if (ret < 0) { 1119 return ret; 1120 } 1121 1122 if (reply_supported) { 1123 return process_message_reply(dev, &msg); 1124 } 1125 } 1126 1127 return 0; 1128 } 1129 1130 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1131 struct vhost_vring_state *ring) 1132 { 1133 bool cross_endian = virtio_has_feature(dev->protocol_features, 1134 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1135 VhostUserMsg msg = { 1136 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1137 .hdr.flags = VHOST_USER_VERSION, 1138 .payload.state = *ring, 1139 .hdr.size = sizeof(msg.payload.state), 1140 }; 1141 1142 if (!cross_endian) { 1143 error_report("vhost-user trying to send unhandled ioctl"); 1144 return -ENOTSUP; 1145 } 1146 1147 return vhost_user_write(dev, &msg, NULL, 0); 1148 } 1149 1150 static int vhost_set_vring(struct vhost_dev *dev, 1151 unsigned long int request, 1152 struct vhost_vring_state *ring) 1153 { 1154 VhostUserMsg msg = { 1155 .hdr.request = request, 1156 .hdr.flags = VHOST_USER_VERSION, 1157 .payload.state = *ring, 1158 .hdr.size = sizeof(msg.payload.state), 1159 }; 1160 1161 return vhost_user_write(dev, &msg, NULL, 0); 1162 } 1163 1164 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1165 struct vhost_vring_state *ring) 1166 { 1167 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1168 } 1169 1170 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1171 { 1172 assert(n && n->unmap_addr); 1173 munmap(n->unmap_addr, qemu_real_host_page_size()); 1174 n->unmap_addr = NULL; 1175 } 1176 1177 static void vhost_user_host_notifier_remove(VhostUserState *user, 1178 VirtIODevice *vdev, int queue_idx) 1179 { 1180 VhostUserHostNotifier *n = &user->notifier[queue_idx]; 1181 1182 if (n->addr) { 1183 if (vdev) { 1184 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1185 } 1186 assert(!n->unmap_addr); 1187 n->unmap_addr = n->addr; 1188 n->addr = NULL; 1189 call_rcu(n, vhost_user_host_notifier_free, rcu); 1190 } 1191 } 1192 1193 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1194 struct vhost_vring_state *ring) 1195 { 1196 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1197 } 1198 1199 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1200 { 1201 int i; 1202 1203 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1204 return -EINVAL; 1205 } 1206 1207 for (i = 0; i < dev->nvqs; ++i) { 1208 int ret; 1209 struct vhost_vring_state state = { 1210 .index = dev->vq_index + i, 1211 .num = enable, 1212 }; 1213 1214 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1215 if (ret < 0) { 1216 /* 1217 * Restoring the previous state is likely infeasible, as well as 1218 * proceeding regardless the error, so just bail out and hope for 1219 * the device-level recovery. 1220 */ 1221 return ret; 1222 } 1223 } 1224 1225 return 0; 1226 } 1227 1228 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1229 struct vhost_vring_state *ring) 1230 { 1231 int ret; 1232 VhostUserMsg msg = { 1233 .hdr.request = VHOST_USER_GET_VRING_BASE, 1234 .hdr.flags = VHOST_USER_VERSION, 1235 .payload.state = *ring, 1236 .hdr.size = sizeof(msg.payload.state), 1237 }; 1238 struct vhost_user *u = dev->opaque; 1239 1240 vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index); 1241 1242 ret = vhost_user_write(dev, &msg, NULL, 0); 1243 if (ret < 0) { 1244 return ret; 1245 } 1246 1247 ret = vhost_user_read(dev, &msg); 1248 if (ret < 0) { 1249 return ret; 1250 } 1251 1252 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1253 error_report("Received unexpected msg type. Expected %d received %d", 1254 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1255 return -EPROTO; 1256 } 1257 1258 if (msg.hdr.size != sizeof(msg.payload.state)) { 1259 error_report("Received bad msg size."); 1260 return -EPROTO; 1261 } 1262 1263 *ring = msg.payload.state; 1264 1265 return 0; 1266 } 1267 1268 static int vhost_set_vring_file(struct vhost_dev *dev, 1269 VhostUserRequest request, 1270 struct vhost_vring_file *file) 1271 { 1272 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1273 size_t fd_num = 0; 1274 VhostUserMsg msg = { 1275 .hdr.request = request, 1276 .hdr.flags = VHOST_USER_VERSION, 1277 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1278 .hdr.size = sizeof(msg.payload.u64), 1279 }; 1280 1281 if (ioeventfd_enabled() && file->fd > 0) { 1282 fds[fd_num++] = file->fd; 1283 } else { 1284 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1285 } 1286 1287 return vhost_user_write(dev, &msg, fds, fd_num); 1288 } 1289 1290 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1291 struct vhost_vring_file *file) 1292 { 1293 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1294 } 1295 1296 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1297 struct vhost_vring_file *file) 1298 { 1299 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1300 } 1301 1302 1303 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1304 { 1305 int ret; 1306 VhostUserMsg msg = { 1307 .hdr.request = request, 1308 .hdr.flags = VHOST_USER_VERSION, 1309 }; 1310 1311 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1312 return 0; 1313 } 1314 1315 ret = vhost_user_write(dev, &msg, NULL, 0); 1316 if (ret < 0) { 1317 return ret; 1318 } 1319 1320 ret = vhost_user_read(dev, &msg); 1321 if (ret < 0) { 1322 return ret; 1323 } 1324 1325 if (msg.hdr.request != request) { 1326 error_report("Received unexpected msg type. Expected %d received %d", 1327 request, msg.hdr.request); 1328 return -EPROTO; 1329 } 1330 1331 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1332 error_report("Received bad msg size."); 1333 return -EPROTO; 1334 } 1335 1336 *u64 = msg.payload.u64; 1337 1338 return 0; 1339 } 1340 1341 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1342 { 1343 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1344 return -EPROTO; 1345 } 1346 1347 return 0; 1348 } 1349 1350 static int enforce_reply(struct vhost_dev *dev, 1351 const VhostUserMsg *msg) 1352 { 1353 uint64_t dummy; 1354 1355 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1356 return process_message_reply(dev, msg); 1357 } 1358 1359 /* 1360 * We need to wait for a reply but the backend does not 1361 * support replies for the command we just sent. 1362 * Send VHOST_USER_GET_FEATURES which makes all backends 1363 * send a reply. 1364 */ 1365 return vhost_user_get_features(dev, &dummy); 1366 } 1367 1368 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1369 struct vhost_vring_addr *addr) 1370 { 1371 int ret; 1372 VhostUserMsg msg = { 1373 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1374 .hdr.flags = VHOST_USER_VERSION, 1375 .payload.addr = *addr, 1376 .hdr.size = sizeof(msg.payload.addr), 1377 }; 1378 1379 bool reply_supported = virtio_has_feature(dev->protocol_features, 1380 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1381 1382 /* 1383 * wait for a reply if logging is enabled to make sure 1384 * backend is actually logging changes 1385 */ 1386 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1387 1388 if (reply_supported && wait_for_reply) { 1389 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1390 } 1391 1392 ret = vhost_user_write(dev, &msg, NULL, 0); 1393 if (ret < 0) { 1394 return ret; 1395 } 1396 1397 if (wait_for_reply) { 1398 return enforce_reply(dev, &msg); 1399 } 1400 1401 return 0; 1402 } 1403 1404 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1405 bool wait_for_reply) 1406 { 1407 VhostUserMsg msg = { 1408 .hdr.request = request, 1409 .hdr.flags = VHOST_USER_VERSION, 1410 .payload.u64 = u64, 1411 .hdr.size = sizeof(msg.payload.u64), 1412 }; 1413 int ret; 1414 1415 if (wait_for_reply) { 1416 bool reply_supported = virtio_has_feature(dev->protocol_features, 1417 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1418 if (reply_supported) { 1419 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1420 } 1421 } 1422 1423 ret = vhost_user_write(dev, &msg, NULL, 0); 1424 if (ret < 0) { 1425 return ret; 1426 } 1427 1428 if (wait_for_reply) { 1429 return enforce_reply(dev, &msg); 1430 } 1431 1432 return 0; 1433 } 1434 1435 static int vhost_user_set_features(struct vhost_dev *dev, 1436 uint64_t features) 1437 { 1438 /* 1439 * wait for a reply if logging is enabled to make sure 1440 * backend is actually logging changes 1441 */ 1442 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1443 1444 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, 1445 log_enabled); 1446 } 1447 1448 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1449 uint64_t features) 1450 { 1451 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1452 false); 1453 } 1454 1455 static int vhost_user_set_owner(struct vhost_dev *dev) 1456 { 1457 VhostUserMsg msg = { 1458 .hdr.request = VHOST_USER_SET_OWNER, 1459 .hdr.flags = VHOST_USER_VERSION, 1460 }; 1461 1462 return vhost_user_write(dev, &msg, NULL, 0); 1463 } 1464 1465 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1466 uint64_t *max_memslots) 1467 { 1468 uint64_t backend_max_memslots; 1469 int err; 1470 1471 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1472 &backend_max_memslots); 1473 if (err < 0) { 1474 return err; 1475 } 1476 1477 *max_memslots = backend_max_memslots; 1478 1479 return 0; 1480 } 1481 1482 static int vhost_user_reset_device(struct vhost_dev *dev) 1483 { 1484 VhostUserMsg msg = { 1485 .hdr.flags = VHOST_USER_VERSION, 1486 }; 1487 1488 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1489 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1490 ? VHOST_USER_RESET_DEVICE 1491 : VHOST_USER_RESET_OWNER; 1492 1493 return vhost_user_write(dev, &msg, NULL, 0); 1494 } 1495 1496 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1497 { 1498 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1499 return -ENOSYS; 1500 } 1501 1502 return dev->config_ops->vhost_dev_config_notifier(dev); 1503 } 1504 1505 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1506 VhostUserVringArea *area, 1507 int fd) 1508 { 1509 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1510 size_t page_size = qemu_real_host_page_size(); 1511 struct vhost_user *u = dev->opaque; 1512 VhostUserState *user = u->user; 1513 VirtIODevice *vdev = dev->vdev; 1514 VhostUserHostNotifier *n; 1515 void *addr; 1516 char *name; 1517 1518 if (!virtio_has_feature(dev->protocol_features, 1519 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1520 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1521 return -EINVAL; 1522 } 1523 1524 n = &user->notifier[queue_idx]; 1525 1526 vhost_user_host_notifier_remove(user, vdev, queue_idx); 1527 1528 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1529 return 0; 1530 } 1531 1532 /* Sanity check. */ 1533 if (area->size != page_size) { 1534 return -EINVAL; 1535 } 1536 1537 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1538 fd, area->offset); 1539 if (addr == MAP_FAILED) { 1540 return -EFAULT; 1541 } 1542 1543 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1544 user, queue_idx); 1545 if (!n->mr.ram) { /* Don't init again after suspend. */ 1546 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1547 page_size, addr); 1548 } else { 1549 n->mr.ram_block->host = addr; 1550 } 1551 g_free(name); 1552 1553 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1554 object_unparent(OBJECT(&n->mr)); 1555 munmap(addr, page_size); 1556 return -ENXIO; 1557 } 1558 1559 n->addr = addr; 1560 1561 return 0; 1562 } 1563 1564 static void close_slave_channel(struct vhost_user *u) 1565 { 1566 g_source_destroy(u->slave_src); 1567 g_source_unref(u->slave_src); 1568 u->slave_src = NULL; 1569 object_unref(OBJECT(u->slave_ioc)); 1570 u->slave_ioc = NULL; 1571 } 1572 1573 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1574 gpointer opaque) 1575 { 1576 struct vhost_dev *dev = opaque; 1577 struct vhost_user *u = dev->opaque; 1578 VhostUserHeader hdr = { 0, }; 1579 VhostUserPayload payload = { 0, }; 1580 Error *local_err = NULL; 1581 gboolean rc = G_SOURCE_CONTINUE; 1582 int ret = 0; 1583 struct iovec iov; 1584 g_autofree int *fd = NULL; 1585 size_t fdsize = 0; 1586 int i; 1587 1588 /* Read header */ 1589 iov.iov_base = &hdr; 1590 iov.iov_len = VHOST_USER_HDR_SIZE; 1591 1592 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1593 error_report_err(local_err); 1594 goto err; 1595 } 1596 1597 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1598 error_report("Failed to read msg header." 1599 " Size %d exceeds the maximum %zu.", hdr.size, 1600 VHOST_USER_PAYLOAD_SIZE); 1601 goto err; 1602 } 1603 1604 /* Read payload */ 1605 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1606 error_report_err(local_err); 1607 goto err; 1608 } 1609 1610 switch (hdr.request) { 1611 case VHOST_USER_SLAVE_IOTLB_MSG: 1612 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1613 break; 1614 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1615 ret = vhost_user_slave_handle_config_change(dev); 1616 break; 1617 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1618 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1619 fd ? fd[0] : -1); 1620 break; 1621 default: 1622 error_report("Received unexpected msg type: %d.", hdr.request); 1623 ret = -EINVAL; 1624 } 1625 1626 /* 1627 * REPLY_ACK feature handling. Other reply types has to be managed 1628 * directly in their request handlers. 1629 */ 1630 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1631 struct iovec iovec[2]; 1632 1633 1634 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1635 hdr.flags |= VHOST_USER_REPLY_MASK; 1636 1637 payload.u64 = !!ret; 1638 hdr.size = sizeof(payload.u64); 1639 1640 iovec[0].iov_base = &hdr; 1641 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1642 iovec[1].iov_base = &payload; 1643 iovec[1].iov_len = hdr.size; 1644 1645 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1646 error_report_err(local_err); 1647 goto err; 1648 } 1649 } 1650 1651 goto fdcleanup; 1652 1653 err: 1654 close_slave_channel(u); 1655 rc = G_SOURCE_REMOVE; 1656 1657 fdcleanup: 1658 if (fd) { 1659 for (i = 0; i < fdsize; i++) { 1660 close(fd[i]); 1661 } 1662 } 1663 return rc; 1664 } 1665 1666 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1667 { 1668 VhostUserMsg msg = { 1669 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1670 .hdr.flags = VHOST_USER_VERSION, 1671 }; 1672 struct vhost_user *u = dev->opaque; 1673 int sv[2], ret = 0; 1674 bool reply_supported = virtio_has_feature(dev->protocol_features, 1675 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1676 Error *local_err = NULL; 1677 QIOChannel *ioc; 1678 1679 if (!virtio_has_feature(dev->protocol_features, 1680 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1681 return 0; 1682 } 1683 1684 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1685 int saved_errno = errno; 1686 error_report("socketpair() failed"); 1687 return -saved_errno; 1688 } 1689 1690 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1691 if (!ioc) { 1692 error_report_err(local_err); 1693 return -ECONNREFUSED; 1694 } 1695 u->slave_ioc = ioc; 1696 slave_update_read_handler(dev, NULL); 1697 1698 if (reply_supported) { 1699 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1700 } 1701 1702 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1703 if (ret) { 1704 goto out; 1705 } 1706 1707 if (reply_supported) { 1708 ret = process_message_reply(dev, &msg); 1709 } 1710 1711 out: 1712 close(sv[1]); 1713 if (ret) { 1714 close_slave_channel(u); 1715 } 1716 1717 return ret; 1718 } 1719 1720 #ifdef CONFIG_LINUX 1721 /* 1722 * Called back from the postcopy fault thread when a fault is received on our 1723 * ufd. 1724 * TODO: This is Linux specific 1725 */ 1726 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1727 void *ufd) 1728 { 1729 struct vhost_dev *dev = pcfd->data; 1730 struct vhost_user *u = dev->opaque; 1731 struct uffd_msg *msg = ufd; 1732 uint64_t faultaddr = msg->arg.pagefault.address; 1733 RAMBlock *rb = NULL; 1734 uint64_t rb_offset; 1735 int i; 1736 1737 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1738 dev->mem->nregions); 1739 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1740 trace_vhost_user_postcopy_fault_handler_loop(i, 1741 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1742 if (faultaddr >= u->postcopy_client_bases[i]) { 1743 /* Ofset of the fault address in the vhost region */ 1744 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1745 if (region_offset < dev->mem->regions[i].memory_size) { 1746 rb_offset = region_offset + u->region_rb_offset[i]; 1747 trace_vhost_user_postcopy_fault_handler_found(i, 1748 region_offset, rb_offset); 1749 rb = u->region_rb[i]; 1750 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1751 rb_offset); 1752 } 1753 } 1754 } 1755 error_report("%s: Failed to find region for fault %" PRIx64, 1756 __func__, faultaddr); 1757 return -1; 1758 } 1759 1760 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1761 uint64_t offset) 1762 { 1763 struct vhost_dev *dev = pcfd->data; 1764 struct vhost_user *u = dev->opaque; 1765 int i; 1766 1767 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1768 1769 if (!u) { 1770 return 0; 1771 } 1772 /* Translate the offset into an address in the clients address space */ 1773 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1774 if (u->region_rb[i] == rb && 1775 offset >= u->region_rb_offset[i] && 1776 offset < (u->region_rb_offset[i] + 1777 dev->mem->regions[i].memory_size)) { 1778 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1779 u->postcopy_client_bases[i]; 1780 trace_vhost_user_postcopy_waker_found(client_addr); 1781 return postcopy_wake_shared(pcfd, client_addr, rb); 1782 } 1783 } 1784 1785 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1786 return 0; 1787 } 1788 #endif 1789 1790 /* 1791 * Called at the start of an inbound postcopy on reception of the 1792 * 'advise' command. 1793 */ 1794 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1795 { 1796 #ifdef CONFIG_LINUX 1797 struct vhost_user *u = dev->opaque; 1798 CharBackend *chr = u->user->chr; 1799 int ufd; 1800 int ret; 1801 VhostUserMsg msg = { 1802 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1803 .hdr.flags = VHOST_USER_VERSION, 1804 }; 1805 1806 ret = vhost_user_write(dev, &msg, NULL, 0); 1807 if (ret < 0) { 1808 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1809 return ret; 1810 } 1811 1812 ret = vhost_user_read(dev, &msg); 1813 if (ret < 0) { 1814 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1815 return ret; 1816 } 1817 1818 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1819 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1820 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1821 return -EPROTO; 1822 } 1823 1824 if (msg.hdr.size) { 1825 error_setg(errp, "Received bad msg size."); 1826 return -EPROTO; 1827 } 1828 ufd = qemu_chr_fe_get_msgfd(chr); 1829 if (ufd < 0) { 1830 error_setg(errp, "%s: Failed to get ufd", __func__); 1831 return -EIO; 1832 } 1833 qemu_socket_set_nonblock(ufd); 1834 1835 /* register ufd with userfault thread */ 1836 u->postcopy_fd.fd = ufd; 1837 u->postcopy_fd.data = dev; 1838 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1839 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1840 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1841 postcopy_register_shared_ufd(&u->postcopy_fd); 1842 return 0; 1843 #else 1844 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1845 return -ENOSYS; 1846 #endif 1847 } 1848 1849 /* 1850 * Called at the switch to postcopy on reception of the 'listen' command. 1851 */ 1852 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1853 { 1854 struct vhost_user *u = dev->opaque; 1855 int ret; 1856 VhostUserMsg msg = { 1857 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1858 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1859 }; 1860 u->postcopy_listen = true; 1861 1862 trace_vhost_user_postcopy_listen(); 1863 1864 ret = vhost_user_write(dev, &msg, NULL, 0); 1865 if (ret < 0) { 1866 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1867 return ret; 1868 } 1869 1870 ret = process_message_reply(dev, &msg); 1871 if (ret) { 1872 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1873 return ret; 1874 } 1875 1876 return 0; 1877 } 1878 1879 /* 1880 * Called at the end of postcopy 1881 */ 1882 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1883 { 1884 VhostUserMsg msg = { 1885 .hdr.request = VHOST_USER_POSTCOPY_END, 1886 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1887 }; 1888 int ret; 1889 struct vhost_user *u = dev->opaque; 1890 1891 trace_vhost_user_postcopy_end_entry(); 1892 1893 ret = vhost_user_write(dev, &msg, NULL, 0); 1894 if (ret < 0) { 1895 error_setg(errp, "Failed to send postcopy_end to vhost"); 1896 return ret; 1897 } 1898 1899 ret = process_message_reply(dev, &msg); 1900 if (ret) { 1901 error_setg(errp, "Failed to receive reply to postcopy_end"); 1902 return ret; 1903 } 1904 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1905 close(u->postcopy_fd.fd); 1906 u->postcopy_fd.handler = NULL; 1907 1908 trace_vhost_user_postcopy_end_exit(); 1909 1910 return 0; 1911 } 1912 1913 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1914 void *opaque) 1915 { 1916 struct PostcopyNotifyData *pnd = opaque; 1917 struct vhost_user *u = container_of(notifier, struct vhost_user, 1918 postcopy_notifier); 1919 struct vhost_dev *dev = u->dev; 1920 1921 switch (pnd->reason) { 1922 case POSTCOPY_NOTIFY_PROBE: 1923 if (!virtio_has_feature(dev->protocol_features, 1924 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1925 /* TODO: Get the device name into this error somehow */ 1926 error_setg(pnd->errp, 1927 "vhost-user backend not capable of postcopy"); 1928 return -ENOENT; 1929 } 1930 break; 1931 1932 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1933 return vhost_user_postcopy_advise(dev, pnd->errp); 1934 1935 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1936 return vhost_user_postcopy_listen(dev, pnd->errp); 1937 1938 case POSTCOPY_NOTIFY_INBOUND_END: 1939 return vhost_user_postcopy_end(dev, pnd->errp); 1940 1941 default: 1942 /* We ignore notifications we don't know */ 1943 break; 1944 } 1945 1946 return 0; 1947 } 1948 1949 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1950 Error **errp) 1951 { 1952 uint64_t features, protocol_features, ram_slots; 1953 struct vhost_user *u; 1954 int err; 1955 1956 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1957 1958 u = g_new0(struct vhost_user, 1); 1959 u->user = opaque; 1960 u->dev = dev; 1961 dev->opaque = u; 1962 1963 err = vhost_user_get_features(dev, &features); 1964 if (err < 0) { 1965 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1966 return err; 1967 } 1968 1969 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1970 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1971 1972 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1973 &protocol_features); 1974 if (err < 0) { 1975 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1976 return -EPROTO; 1977 } 1978 1979 dev->protocol_features = 1980 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1981 1982 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1983 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1984 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1985 } else if (!(protocol_features & 1986 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1987 error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1988 "but backend does not support it."); 1989 return -EINVAL; 1990 } 1991 1992 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1993 if (err < 0) { 1994 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1995 return -EPROTO; 1996 } 1997 1998 /* query the max queues we support if backend supports Multiple Queue */ 1999 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2000 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2001 &dev->max_queues); 2002 if (err < 0) { 2003 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2004 return -EPROTO; 2005 } 2006 } else { 2007 dev->max_queues = 1; 2008 } 2009 2010 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2011 error_setg(errp, "The maximum number of queues supported by the " 2012 "backend is %" PRIu64, dev->max_queues); 2013 return -EINVAL; 2014 } 2015 2016 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2017 !(virtio_has_feature(dev->protocol_features, 2018 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 2019 virtio_has_feature(dev->protocol_features, 2020 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2021 error_setg(errp, "IOMMU support requires reply-ack and " 2022 "slave-req protocol features."); 2023 return -EINVAL; 2024 } 2025 2026 /* get max memory regions if backend supports configurable RAM slots */ 2027 if (!virtio_has_feature(dev->protocol_features, 2028 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2029 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2030 } else { 2031 err = vhost_user_get_max_memslots(dev, &ram_slots); 2032 if (err < 0) { 2033 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2034 return -EPROTO; 2035 } 2036 2037 if (ram_slots < u->user->memory_slots) { 2038 error_setg(errp, "The backend specified a max ram slots limit " 2039 "of %" PRIu64", when the prior validated limit was " 2040 "%d. This limit should never decrease.", ram_slots, 2041 u->user->memory_slots); 2042 return -EINVAL; 2043 } 2044 2045 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2046 } 2047 } 2048 2049 if (dev->migration_blocker == NULL && 2050 !virtio_has_feature(dev->protocol_features, 2051 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2052 error_setg(&dev->migration_blocker, 2053 "Migration disabled: vhost-user backend lacks " 2054 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2055 } 2056 2057 if (dev->vq_index == 0) { 2058 err = vhost_setup_slave_channel(dev); 2059 if (err < 0) { 2060 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2061 return -EPROTO; 2062 } 2063 } 2064 2065 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2066 postcopy_add_notifier(&u->postcopy_notifier); 2067 2068 return 0; 2069 } 2070 2071 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2072 { 2073 struct vhost_user *u; 2074 2075 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2076 2077 u = dev->opaque; 2078 if (u->postcopy_notifier.notify) { 2079 postcopy_remove_notifier(&u->postcopy_notifier); 2080 u->postcopy_notifier.notify = NULL; 2081 } 2082 u->postcopy_listen = false; 2083 if (u->postcopy_fd.handler) { 2084 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2085 close(u->postcopy_fd.fd); 2086 u->postcopy_fd.handler = NULL; 2087 } 2088 if (u->slave_ioc) { 2089 close_slave_channel(u); 2090 } 2091 g_free(u->region_rb); 2092 u->region_rb = NULL; 2093 g_free(u->region_rb_offset); 2094 u->region_rb_offset = NULL; 2095 u->region_rb_len = 0; 2096 g_free(u); 2097 dev->opaque = 0; 2098 2099 return 0; 2100 } 2101 2102 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2103 { 2104 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2105 2106 return idx; 2107 } 2108 2109 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2110 { 2111 struct vhost_user *u = dev->opaque; 2112 2113 return u->user->memory_slots; 2114 } 2115 2116 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2117 { 2118 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2119 2120 return virtio_has_feature(dev->protocol_features, 2121 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2122 } 2123 2124 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2125 { 2126 VhostUserMsg msg = { }; 2127 2128 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2129 2130 /* If guest supports GUEST_ANNOUNCE do nothing */ 2131 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2132 return 0; 2133 } 2134 2135 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2136 if (virtio_has_feature(dev->protocol_features, 2137 VHOST_USER_PROTOCOL_F_RARP)) { 2138 msg.hdr.request = VHOST_USER_SEND_RARP; 2139 msg.hdr.flags = VHOST_USER_VERSION; 2140 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2141 msg.hdr.size = sizeof(msg.payload.u64); 2142 2143 return vhost_user_write(dev, &msg, NULL, 0); 2144 } 2145 return -ENOTSUP; 2146 } 2147 2148 static bool vhost_user_can_merge(struct vhost_dev *dev, 2149 uint64_t start1, uint64_t size1, 2150 uint64_t start2, uint64_t size2) 2151 { 2152 ram_addr_t offset; 2153 int mfd, rfd; 2154 2155 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2156 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2157 2158 return mfd == rfd; 2159 } 2160 2161 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2162 { 2163 VhostUserMsg msg; 2164 bool reply_supported = virtio_has_feature(dev->protocol_features, 2165 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2166 int ret; 2167 2168 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2169 return 0; 2170 } 2171 2172 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2173 msg.payload.u64 = mtu; 2174 msg.hdr.size = sizeof(msg.payload.u64); 2175 msg.hdr.flags = VHOST_USER_VERSION; 2176 if (reply_supported) { 2177 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2178 } 2179 2180 ret = vhost_user_write(dev, &msg, NULL, 0); 2181 if (ret < 0) { 2182 return ret; 2183 } 2184 2185 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2186 if (reply_supported) { 2187 return process_message_reply(dev, &msg); 2188 } 2189 2190 return 0; 2191 } 2192 2193 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2194 struct vhost_iotlb_msg *imsg) 2195 { 2196 int ret; 2197 VhostUserMsg msg = { 2198 .hdr.request = VHOST_USER_IOTLB_MSG, 2199 .hdr.size = sizeof(msg.payload.iotlb), 2200 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2201 .payload.iotlb = *imsg, 2202 }; 2203 2204 ret = vhost_user_write(dev, &msg, NULL, 0); 2205 if (ret < 0) { 2206 return ret; 2207 } 2208 2209 return process_message_reply(dev, &msg); 2210 } 2211 2212 2213 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2214 { 2215 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2216 } 2217 2218 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2219 uint32_t config_len, Error **errp) 2220 { 2221 int ret; 2222 VhostUserMsg msg = { 2223 .hdr.request = VHOST_USER_GET_CONFIG, 2224 .hdr.flags = VHOST_USER_VERSION, 2225 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2226 }; 2227 2228 if (!virtio_has_feature(dev->protocol_features, 2229 VHOST_USER_PROTOCOL_F_CONFIG)) { 2230 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2231 return -EINVAL; 2232 } 2233 2234 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2235 2236 msg.payload.config.offset = 0; 2237 msg.payload.config.size = config_len; 2238 ret = vhost_user_write(dev, &msg, NULL, 0); 2239 if (ret < 0) { 2240 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2241 return ret; 2242 } 2243 2244 ret = vhost_user_read(dev, &msg); 2245 if (ret < 0) { 2246 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2247 return ret; 2248 } 2249 2250 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2251 error_setg(errp, 2252 "Received unexpected msg type. Expected %d received %d", 2253 VHOST_USER_GET_CONFIG, msg.hdr.request); 2254 return -EPROTO; 2255 } 2256 2257 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2258 error_setg(errp, "Received bad msg size."); 2259 return -EPROTO; 2260 } 2261 2262 memcpy(config, msg.payload.config.region, config_len); 2263 2264 return 0; 2265 } 2266 2267 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2268 uint32_t offset, uint32_t size, uint32_t flags) 2269 { 2270 int ret; 2271 uint8_t *p; 2272 bool reply_supported = virtio_has_feature(dev->protocol_features, 2273 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2274 2275 VhostUserMsg msg = { 2276 .hdr.request = VHOST_USER_SET_CONFIG, 2277 .hdr.flags = VHOST_USER_VERSION, 2278 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2279 }; 2280 2281 if (!virtio_has_feature(dev->protocol_features, 2282 VHOST_USER_PROTOCOL_F_CONFIG)) { 2283 return -ENOTSUP; 2284 } 2285 2286 if (reply_supported) { 2287 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2288 } 2289 2290 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2291 return -EINVAL; 2292 } 2293 2294 msg.payload.config.offset = offset, 2295 msg.payload.config.size = size, 2296 msg.payload.config.flags = flags, 2297 p = msg.payload.config.region; 2298 memcpy(p, data, size); 2299 2300 ret = vhost_user_write(dev, &msg, NULL, 0); 2301 if (ret < 0) { 2302 return ret; 2303 } 2304 2305 if (reply_supported) { 2306 return process_message_reply(dev, &msg); 2307 } 2308 2309 return 0; 2310 } 2311 2312 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2313 void *session_info, 2314 uint64_t *session_id) 2315 { 2316 int ret; 2317 bool crypto_session = virtio_has_feature(dev->protocol_features, 2318 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2319 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2320 VhostUserMsg msg = { 2321 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2322 .hdr.flags = VHOST_USER_VERSION, 2323 .hdr.size = sizeof(msg.payload.session), 2324 }; 2325 2326 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2327 2328 if (!crypto_session) { 2329 error_report("vhost-user trying to send unhandled ioctl"); 2330 return -ENOTSUP; 2331 } 2332 2333 memcpy(&msg.payload.session.session_setup_data, sess_info, 2334 sizeof(CryptoDevBackendSymSessionInfo)); 2335 if (sess_info->key_len) { 2336 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2337 sess_info->key_len); 2338 } 2339 if (sess_info->auth_key_len > 0) { 2340 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2341 sess_info->auth_key_len); 2342 } 2343 ret = vhost_user_write(dev, &msg, NULL, 0); 2344 if (ret < 0) { 2345 error_report("vhost_user_write() return %d, create session failed", 2346 ret); 2347 return ret; 2348 } 2349 2350 ret = vhost_user_read(dev, &msg); 2351 if (ret < 0) { 2352 error_report("vhost_user_read() return %d, create session failed", 2353 ret); 2354 return ret; 2355 } 2356 2357 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2358 error_report("Received unexpected msg type. Expected %d received %d", 2359 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2360 return -EPROTO; 2361 } 2362 2363 if (msg.hdr.size != sizeof(msg.payload.session)) { 2364 error_report("Received bad msg size."); 2365 return -EPROTO; 2366 } 2367 2368 if (msg.payload.session.session_id < 0) { 2369 error_report("Bad session id: %" PRId64 "", 2370 msg.payload.session.session_id); 2371 return -EINVAL; 2372 } 2373 *session_id = msg.payload.session.session_id; 2374 2375 return 0; 2376 } 2377 2378 static int 2379 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2380 { 2381 int ret; 2382 bool crypto_session = virtio_has_feature(dev->protocol_features, 2383 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2384 VhostUserMsg msg = { 2385 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2386 .hdr.flags = VHOST_USER_VERSION, 2387 .hdr.size = sizeof(msg.payload.u64), 2388 }; 2389 msg.payload.u64 = session_id; 2390 2391 if (!crypto_session) { 2392 error_report("vhost-user trying to send unhandled ioctl"); 2393 return -ENOTSUP; 2394 } 2395 2396 ret = vhost_user_write(dev, &msg, NULL, 0); 2397 if (ret < 0) { 2398 error_report("vhost_user_write() return %d, close session failed", 2399 ret); 2400 return ret; 2401 } 2402 2403 return 0; 2404 } 2405 2406 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2407 MemoryRegionSection *section) 2408 { 2409 bool result; 2410 2411 result = memory_region_get_fd(section->mr) >= 0; 2412 2413 return result; 2414 } 2415 2416 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2417 uint16_t queue_size, 2418 struct vhost_inflight *inflight) 2419 { 2420 void *addr; 2421 int fd; 2422 int ret; 2423 struct vhost_user *u = dev->opaque; 2424 CharBackend *chr = u->user->chr; 2425 VhostUserMsg msg = { 2426 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2427 .hdr.flags = VHOST_USER_VERSION, 2428 .payload.inflight.num_queues = dev->nvqs, 2429 .payload.inflight.queue_size = queue_size, 2430 .hdr.size = sizeof(msg.payload.inflight), 2431 }; 2432 2433 if (!virtio_has_feature(dev->protocol_features, 2434 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2435 return 0; 2436 } 2437 2438 ret = vhost_user_write(dev, &msg, NULL, 0); 2439 if (ret < 0) { 2440 return ret; 2441 } 2442 2443 ret = vhost_user_read(dev, &msg); 2444 if (ret < 0) { 2445 return ret; 2446 } 2447 2448 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2449 error_report("Received unexpected msg type. " 2450 "Expected %d received %d", 2451 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2452 return -EPROTO; 2453 } 2454 2455 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2456 error_report("Received bad msg size."); 2457 return -EPROTO; 2458 } 2459 2460 if (!msg.payload.inflight.mmap_size) { 2461 return 0; 2462 } 2463 2464 fd = qemu_chr_fe_get_msgfd(chr); 2465 if (fd < 0) { 2466 error_report("Failed to get mem fd"); 2467 return -EIO; 2468 } 2469 2470 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2471 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2472 2473 if (addr == MAP_FAILED) { 2474 error_report("Failed to mmap mem fd"); 2475 close(fd); 2476 return -EFAULT; 2477 } 2478 2479 inflight->addr = addr; 2480 inflight->fd = fd; 2481 inflight->size = msg.payload.inflight.mmap_size; 2482 inflight->offset = msg.payload.inflight.mmap_offset; 2483 inflight->queue_size = queue_size; 2484 2485 return 0; 2486 } 2487 2488 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2489 struct vhost_inflight *inflight) 2490 { 2491 VhostUserMsg msg = { 2492 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2493 .hdr.flags = VHOST_USER_VERSION, 2494 .payload.inflight.mmap_size = inflight->size, 2495 .payload.inflight.mmap_offset = inflight->offset, 2496 .payload.inflight.num_queues = dev->nvqs, 2497 .payload.inflight.queue_size = inflight->queue_size, 2498 .hdr.size = sizeof(msg.payload.inflight), 2499 }; 2500 2501 if (!virtio_has_feature(dev->protocol_features, 2502 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2503 return 0; 2504 } 2505 2506 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2507 } 2508 2509 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2510 { 2511 if (user->chr) { 2512 error_setg(errp, "Cannot initialize vhost-user state"); 2513 return false; 2514 } 2515 user->chr = chr; 2516 user->memory_slots = 0; 2517 return true; 2518 } 2519 2520 void vhost_user_cleanup(VhostUserState *user) 2521 { 2522 int i; 2523 VhostUserHostNotifier *n; 2524 2525 if (!user->chr) { 2526 return; 2527 } 2528 memory_region_transaction_begin(); 2529 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2530 n = &user->notifier[i]; 2531 vhost_user_host_notifier_remove(user, NULL, i); 2532 object_unparent(OBJECT(&n->mr)); 2533 } 2534 memory_region_transaction_commit(); 2535 user->chr = NULL; 2536 } 2537 2538 const VhostOps user_ops = { 2539 .backend_type = VHOST_BACKEND_TYPE_USER, 2540 .vhost_backend_init = vhost_user_backend_init, 2541 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2542 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2543 .vhost_set_log_base = vhost_user_set_log_base, 2544 .vhost_set_mem_table = vhost_user_set_mem_table, 2545 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2546 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2547 .vhost_set_vring_num = vhost_user_set_vring_num, 2548 .vhost_set_vring_base = vhost_user_set_vring_base, 2549 .vhost_get_vring_base = vhost_user_get_vring_base, 2550 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2551 .vhost_set_vring_call = vhost_user_set_vring_call, 2552 .vhost_set_features = vhost_user_set_features, 2553 .vhost_get_features = vhost_user_get_features, 2554 .vhost_set_owner = vhost_user_set_owner, 2555 .vhost_reset_device = vhost_user_reset_device, 2556 .vhost_get_vq_index = vhost_user_get_vq_index, 2557 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2558 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2559 .vhost_migration_done = vhost_user_migration_done, 2560 .vhost_backend_can_merge = vhost_user_can_merge, 2561 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2562 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2563 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2564 .vhost_get_config = vhost_user_get_config, 2565 .vhost_set_config = vhost_user_set_config, 2566 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2567 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2568 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2569 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2570 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2571 }; 2572