1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/cryptodev.h" 25 #include "migration/migration.h" 26 #include "migration/postcopy-ram.h" 27 #include "trace.h" 28 #include "exec/ramblock.h" 29 30 #include <sys/ioctl.h> 31 #include <sys/socket.h> 32 #include <sys/un.h> 33 34 #include "standard-headers/linux/vhost_types.h" 35 36 #ifdef CONFIG_LINUX 37 #include <linux/userfaultfd.h> 38 #endif 39 40 #define VHOST_MEMORY_BASELINE_NREGIONS 8 41 #define VHOST_USER_F_PROTOCOL_FEATURES 30 42 #define VHOST_USER_SLAVE_MAX_FDS 8 43 44 /* 45 * Set maximum number of RAM slots supported to 46 * the maximum number supported by the target 47 * hardware plaform. 48 */ 49 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 50 defined(TARGET_ARM) || defined(TARGET_ARM_64) 51 #include "hw/acpi/acpi.h" 52 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 53 54 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64) 55 #include "hw/ppc/spapr.h" 56 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 57 58 #else 59 #define VHOST_USER_MAX_RAM_SLOTS 512 60 #endif 61 62 /* 63 * Maximum size of virtio device config space 64 */ 65 #define VHOST_USER_MAX_CONFIG_SIZE 256 66 67 enum VhostUserProtocolFeature { 68 VHOST_USER_PROTOCOL_F_MQ = 0, 69 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 70 VHOST_USER_PROTOCOL_F_RARP = 2, 71 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 72 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 73 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 74 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 75 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 76 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 77 VHOST_USER_PROTOCOL_F_CONFIG = 9, 78 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 79 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 80 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 81 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 82 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 83 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 84 VHOST_USER_PROTOCOL_F_MAX 85 }; 86 87 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 88 89 typedef enum VhostUserRequest { 90 VHOST_USER_NONE = 0, 91 VHOST_USER_GET_FEATURES = 1, 92 VHOST_USER_SET_FEATURES = 2, 93 VHOST_USER_SET_OWNER = 3, 94 VHOST_USER_RESET_OWNER = 4, 95 VHOST_USER_SET_MEM_TABLE = 5, 96 VHOST_USER_SET_LOG_BASE = 6, 97 VHOST_USER_SET_LOG_FD = 7, 98 VHOST_USER_SET_VRING_NUM = 8, 99 VHOST_USER_SET_VRING_ADDR = 9, 100 VHOST_USER_SET_VRING_BASE = 10, 101 VHOST_USER_GET_VRING_BASE = 11, 102 VHOST_USER_SET_VRING_KICK = 12, 103 VHOST_USER_SET_VRING_CALL = 13, 104 VHOST_USER_SET_VRING_ERR = 14, 105 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 106 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 107 VHOST_USER_GET_QUEUE_NUM = 17, 108 VHOST_USER_SET_VRING_ENABLE = 18, 109 VHOST_USER_SEND_RARP = 19, 110 VHOST_USER_NET_SET_MTU = 20, 111 VHOST_USER_SET_SLAVE_REQ_FD = 21, 112 VHOST_USER_IOTLB_MSG = 22, 113 VHOST_USER_SET_VRING_ENDIAN = 23, 114 VHOST_USER_GET_CONFIG = 24, 115 VHOST_USER_SET_CONFIG = 25, 116 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 117 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 118 VHOST_USER_POSTCOPY_ADVISE = 28, 119 VHOST_USER_POSTCOPY_LISTEN = 29, 120 VHOST_USER_POSTCOPY_END = 30, 121 VHOST_USER_GET_INFLIGHT_FD = 31, 122 VHOST_USER_SET_INFLIGHT_FD = 32, 123 VHOST_USER_GPU_SET_SOCKET = 33, 124 VHOST_USER_RESET_DEVICE = 34, 125 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 126 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 127 VHOST_USER_ADD_MEM_REG = 37, 128 VHOST_USER_REM_MEM_REG = 38, 129 VHOST_USER_MAX 130 } VhostUserRequest; 131 132 typedef enum VhostUserSlaveRequest { 133 VHOST_USER_SLAVE_NONE = 0, 134 VHOST_USER_SLAVE_IOTLB_MSG = 1, 135 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 136 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 137 VHOST_USER_SLAVE_MAX 138 } VhostUserSlaveRequest; 139 140 typedef struct VhostUserMemoryRegion { 141 uint64_t guest_phys_addr; 142 uint64_t memory_size; 143 uint64_t userspace_addr; 144 uint64_t mmap_offset; 145 } VhostUserMemoryRegion; 146 147 typedef struct VhostUserMemory { 148 uint32_t nregions; 149 uint32_t padding; 150 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 151 } VhostUserMemory; 152 153 typedef struct VhostUserMemRegMsg { 154 uint64_t padding; 155 VhostUserMemoryRegion region; 156 } VhostUserMemRegMsg; 157 158 typedef struct VhostUserLog { 159 uint64_t mmap_size; 160 uint64_t mmap_offset; 161 } VhostUserLog; 162 163 typedef struct VhostUserConfig { 164 uint32_t offset; 165 uint32_t size; 166 uint32_t flags; 167 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 168 } VhostUserConfig; 169 170 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 171 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 172 173 typedef struct VhostUserCryptoSession { 174 /* session id for success, -1 on errors */ 175 int64_t session_id; 176 CryptoDevBackendSymSessionInfo session_setup_data; 177 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 178 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 179 } VhostUserCryptoSession; 180 181 static VhostUserConfig c __attribute__ ((unused)); 182 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 183 + sizeof(c.size) \ 184 + sizeof(c.flags)) 185 186 typedef struct VhostUserVringArea { 187 uint64_t u64; 188 uint64_t size; 189 uint64_t offset; 190 } VhostUserVringArea; 191 192 typedef struct VhostUserInflight { 193 uint64_t mmap_size; 194 uint64_t mmap_offset; 195 uint16_t num_queues; 196 uint16_t queue_size; 197 } VhostUserInflight; 198 199 typedef struct { 200 VhostUserRequest request; 201 202 #define VHOST_USER_VERSION_MASK (0x3) 203 #define VHOST_USER_REPLY_MASK (0x1<<2) 204 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 205 uint32_t flags; 206 uint32_t size; /* the following payload size */ 207 } QEMU_PACKED VhostUserHeader; 208 209 typedef union { 210 #define VHOST_USER_VRING_IDX_MASK (0xff) 211 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 212 uint64_t u64; 213 struct vhost_vring_state state; 214 struct vhost_vring_addr addr; 215 VhostUserMemory memory; 216 VhostUserMemRegMsg mem_reg; 217 VhostUserLog log; 218 struct vhost_iotlb_msg iotlb; 219 VhostUserConfig config; 220 VhostUserCryptoSession session; 221 VhostUserVringArea area; 222 VhostUserInflight inflight; 223 } VhostUserPayload; 224 225 typedef struct VhostUserMsg { 226 VhostUserHeader hdr; 227 VhostUserPayload payload; 228 } QEMU_PACKED VhostUserMsg; 229 230 static VhostUserMsg m __attribute__ ((unused)); 231 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 232 233 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 234 235 /* The version of the protocol we support */ 236 #define VHOST_USER_VERSION (0x1) 237 238 struct vhost_user { 239 struct vhost_dev *dev; 240 /* Shared between vhost devs of the same virtio device */ 241 VhostUserState *user; 242 QIOChannel *slave_ioc; 243 GSource *slave_src; 244 NotifierWithReturn postcopy_notifier; 245 struct PostCopyFD postcopy_fd; 246 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 247 /* Length of the region_rb and region_rb_offset arrays */ 248 size_t region_rb_len; 249 /* RAMBlock associated with a given region */ 250 RAMBlock **region_rb; 251 /* The offset from the start of the RAMBlock to the start of the 252 * vhost region. 253 */ 254 ram_addr_t *region_rb_offset; 255 256 /* True once we've entered postcopy_listen */ 257 bool postcopy_listen; 258 259 /* Our current regions */ 260 int num_shadow_regions; 261 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 262 }; 263 264 struct scrub_regions { 265 struct vhost_memory_region *region; 266 int reg_idx; 267 int fd_idx; 268 }; 269 270 static bool ioeventfd_enabled(void) 271 { 272 return !kvm_enabled() || kvm_eventfds_enabled(); 273 } 274 275 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 276 { 277 struct vhost_user *u = dev->opaque; 278 CharBackend *chr = u->user->chr; 279 uint8_t *p = (uint8_t *) msg; 280 int r, size = VHOST_USER_HDR_SIZE; 281 282 r = qemu_chr_fe_read_all(chr, p, size); 283 if (r != size) { 284 int saved_errno = errno; 285 error_report("Failed to read msg header. Read %d instead of %d." 286 " Original request %d.", r, size, msg->hdr.request); 287 return r < 0 ? -saved_errno : -EIO; 288 } 289 290 /* validate received flags */ 291 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 292 error_report("Failed to read msg header." 293 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 294 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 295 return -EPROTO; 296 } 297 298 return 0; 299 } 300 301 struct vhost_user_read_cb_data { 302 struct vhost_dev *dev; 303 VhostUserMsg *msg; 304 GMainLoop *loop; 305 int ret; 306 }; 307 308 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 309 gpointer opaque) 310 { 311 struct vhost_user_read_cb_data *data = opaque; 312 struct vhost_dev *dev = data->dev; 313 VhostUserMsg *msg = data->msg; 314 struct vhost_user *u = dev->opaque; 315 CharBackend *chr = u->user->chr; 316 uint8_t *p = (uint8_t *) msg; 317 int r, size; 318 319 r = vhost_user_read_header(dev, msg); 320 if (r < 0) { 321 data->ret = r; 322 goto end; 323 } 324 325 /* validate message size is sane */ 326 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 327 error_report("Failed to read msg header." 328 " Size %d exceeds the maximum %zu.", msg->hdr.size, 329 VHOST_USER_PAYLOAD_SIZE); 330 data->ret = -EPROTO; 331 goto end; 332 } 333 334 if (msg->hdr.size) { 335 p += VHOST_USER_HDR_SIZE; 336 size = msg->hdr.size; 337 r = qemu_chr_fe_read_all(chr, p, size); 338 if (r != size) { 339 int saved_errno = errno; 340 error_report("Failed to read msg payload." 341 " Read %d instead of %d.", r, msg->hdr.size); 342 data->ret = r < 0 ? -saved_errno : -EIO; 343 goto end; 344 } 345 } 346 347 end: 348 g_main_loop_quit(data->loop); 349 return G_SOURCE_REMOVE; 350 } 351 352 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 353 gpointer opaque); 354 355 /* 356 * This updates the read handler to use a new event loop context. 357 * Event sources are removed from the previous context : this ensures 358 * that events detected in the previous context are purged. They will 359 * be re-detected and processed in the new context. 360 */ 361 static void slave_update_read_handler(struct vhost_dev *dev, 362 GMainContext *ctxt) 363 { 364 struct vhost_user *u = dev->opaque; 365 366 if (!u->slave_ioc) { 367 return; 368 } 369 370 if (u->slave_src) { 371 g_source_destroy(u->slave_src); 372 g_source_unref(u->slave_src); 373 } 374 375 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 376 G_IO_IN | G_IO_HUP, 377 slave_read, dev, NULL, 378 ctxt); 379 } 380 381 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 382 { 383 struct vhost_user *u = dev->opaque; 384 CharBackend *chr = u->user->chr; 385 GMainContext *prev_ctxt = chr->chr->gcontext; 386 GMainContext *ctxt = g_main_context_new(); 387 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 388 struct vhost_user_read_cb_data data = { 389 .dev = dev, 390 .loop = loop, 391 .msg = msg, 392 .ret = 0 393 }; 394 395 /* 396 * We want to be able to monitor the slave channel fd while waiting 397 * for chr I/O. This requires an event loop, but we can't nest the 398 * one to which chr is currently attached : its fd handlers might not 399 * be prepared for re-entrancy. So we create a new one and switch chr 400 * to use it. 401 */ 402 slave_update_read_handler(dev, ctxt); 403 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 404 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 405 406 g_main_loop_run(loop); 407 408 /* 409 * Restore the previous event loop context. This also destroys/recreates 410 * event sources : this guarantees that all pending events in the original 411 * context that have been processed by the nested loop are purged. 412 */ 413 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 414 slave_update_read_handler(dev, NULL); 415 416 g_main_loop_unref(loop); 417 g_main_context_unref(ctxt); 418 419 return data.ret; 420 } 421 422 static int process_message_reply(struct vhost_dev *dev, 423 const VhostUserMsg *msg) 424 { 425 int ret; 426 VhostUserMsg msg_reply; 427 428 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 429 return 0; 430 } 431 432 ret = vhost_user_read(dev, &msg_reply); 433 if (ret < 0) { 434 return ret; 435 } 436 437 if (msg_reply.hdr.request != msg->hdr.request) { 438 error_report("Received unexpected msg type. " 439 "Expected %d received %d", 440 msg->hdr.request, msg_reply.hdr.request); 441 return -EPROTO; 442 } 443 444 return msg_reply.payload.u64 ? -EIO : 0; 445 } 446 447 static bool vhost_user_one_time_request(VhostUserRequest request) 448 { 449 switch (request) { 450 case VHOST_USER_SET_OWNER: 451 case VHOST_USER_RESET_OWNER: 452 case VHOST_USER_SET_MEM_TABLE: 453 case VHOST_USER_GET_QUEUE_NUM: 454 case VHOST_USER_NET_SET_MTU: 455 return true; 456 default: 457 return false; 458 } 459 } 460 461 /* most non-init callers ignore the error */ 462 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 463 int *fds, int fd_num) 464 { 465 struct vhost_user *u = dev->opaque; 466 CharBackend *chr = u->user->chr; 467 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 468 469 /* 470 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 471 * we just need send it once in the first time. For later such 472 * request, we just ignore it. 473 */ 474 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 475 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 476 return 0; 477 } 478 479 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 480 error_report("Failed to set msg fds."); 481 return -EINVAL; 482 } 483 484 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 485 if (ret != size) { 486 int saved_errno = errno; 487 error_report("Failed to write msg." 488 " Wrote %d instead of %d.", ret, size); 489 return ret < 0 ? -saved_errno : -EIO; 490 } 491 492 return 0; 493 } 494 495 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 496 { 497 VhostUserMsg msg = { 498 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 499 .hdr.flags = VHOST_USER_VERSION, 500 }; 501 502 return vhost_user_write(dev, &msg, &fd, 1); 503 } 504 505 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 506 struct vhost_log *log) 507 { 508 int fds[VHOST_USER_MAX_RAM_SLOTS]; 509 size_t fd_num = 0; 510 bool shmfd = virtio_has_feature(dev->protocol_features, 511 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 512 int ret; 513 VhostUserMsg msg = { 514 .hdr.request = VHOST_USER_SET_LOG_BASE, 515 .hdr.flags = VHOST_USER_VERSION, 516 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 517 .payload.log.mmap_offset = 0, 518 .hdr.size = sizeof(msg.payload.log), 519 }; 520 521 if (shmfd && log->fd != -1) { 522 fds[fd_num++] = log->fd; 523 } 524 525 ret = vhost_user_write(dev, &msg, fds, fd_num); 526 if (ret < 0) { 527 return ret; 528 } 529 530 if (shmfd) { 531 msg.hdr.size = 0; 532 ret = vhost_user_read(dev, &msg); 533 if (ret < 0) { 534 return ret; 535 } 536 537 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 538 error_report("Received unexpected msg type. " 539 "Expected %d received %d", 540 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 541 return -EPROTO; 542 } 543 } 544 545 return 0; 546 } 547 548 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 549 int *fd) 550 { 551 MemoryRegion *mr; 552 553 assert((uintptr_t)addr == addr); 554 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 555 *fd = memory_region_get_fd(mr); 556 557 return mr; 558 } 559 560 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 561 struct vhost_memory_region *src, 562 uint64_t mmap_offset) 563 { 564 assert(src != NULL && dst != NULL); 565 dst->userspace_addr = src->userspace_addr; 566 dst->memory_size = src->memory_size; 567 dst->guest_phys_addr = src->guest_phys_addr; 568 dst->mmap_offset = mmap_offset; 569 } 570 571 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 572 struct vhost_dev *dev, 573 VhostUserMsg *msg, 574 int *fds, size_t *fd_num, 575 bool track_ramblocks) 576 { 577 int i, fd; 578 ram_addr_t offset; 579 MemoryRegion *mr; 580 struct vhost_memory_region *reg; 581 VhostUserMemoryRegion region_buffer; 582 583 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 584 585 for (i = 0; i < dev->mem->nregions; ++i) { 586 reg = dev->mem->regions + i; 587 588 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 589 if (fd > 0) { 590 if (track_ramblocks) { 591 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 592 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 593 reg->memory_size, 594 reg->guest_phys_addr, 595 reg->userspace_addr, 596 offset); 597 u->region_rb_offset[i] = offset; 598 u->region_rb[i] = mr->ram_block; 599 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 600 error_report("Failed preparing vhost-user memory table msg"); 601 return -ENOBUFS; 602 } 603 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 604 msg->payload.memory.regions[*fd_num] = region_buffer; 605 fds[(*fd_num)++] = fd; 606 } else if (track_ramblocks) { 607 u->region_rb_offset[i] = 0; 608 u->region_rb[i] = NULL; 609 } 610 } 611 612 msg->payload.memory.nregions = *fd_num; 613 614 if (!*fd_num) { 615 error_report("Failed initializing vhost-user memory map, " 616 "consider using -object memory-backend-file share=on"); 617 return -EINVAL; 618 } 619 620 msg->hdr.size = sizeof(msg->payload.memory.nregions); 621 msg->hdr.size += sizeof(msg->payload.memory.padding); 622 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 623 624 return 0; 625 } 626 627 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 628 struct vhost_memory_region *vdev_reg) 629 { 630 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 631 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 632 shadow_reg->memory_size == vdev_reg->memory_size; 633 } 634 635 static void scrub_shadow_regions(struct vhost_dev *dev, 636 struct scrub_regions *add_reg, 637 int *nr_add_reg, 638 struct scrub_regions *rem_reg, 639 int *nr_rem_reg, uint64_t *shadow_pcb, 640 bool track_ramblocks) 641 { 642 struct vhost_user *u = dev->opaque; 643 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 644 struct vhost_memory_region *reg, *shadow_reg; 645 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 646 ram_addr_t offset; 647 MemoryRegion *mr; 648 bool matching; 649 650 /* 651 * Find memory regions present in our shadow state which are not in 652 * the device's current memory state. 653 * 654 * Mark regions in both the shadow and device state as "found". 655 */ 656 for (i = 0; i < u->num_shadow_regions; i++) { 657 shadow_reg = &u->shadow_regions[i]; 658 matching = false; 659 660 for (j = 0; j < dev->mem->nregions; j++) { 661 reg = &dev->mem->regions[j]; 662 663 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 664 665 if (reg_equal(shadow_reg, reg)) { 666 matching = true; 667 found[j] = true; 668 if (track_ramblocks) { 669 /* 670 * Reset postcopy client bases, region_rb, and 671 * region_rb_offset in case regions are removed. 672 */ 673 if (fd > 0) { 674 u->region_rb_offset[j] = offset; 675 u->region_rb[j] = mr->ram_block; 676 shadow_pcb[j] = u->postcopy_client_bases[i]; 677 } else { 678 u->region_rb_offset[j] = 0; 679 u->region_rb[j] = NULL; 680 } 681 } 682 break; 683 } 684 } 685 686 /* 687 * If the region was not found in the current device memory state 688 * create an entry for it in the removed list. 689 */ 690 if (!matching) { 691 rem_reg[rm_idx].region = shadow_reg; 692 rem_reg[rm_idx++].reg_idx = i; 693 } 694 } 695 696 /* 697 * For regions not marked "found", create entries in the added list. 698 * 699 * Note their indexes in the device memory state and the indexes of their 700 * file descriptors. 701 */ 702 for (i = 0; i < dev->mem->nregions; i++) { 703 reg = &dev->mem->regions[i]; 704 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 705 if (fd > 0) { 706 ++fd_num; 707 } 708 709 /* 710 * If the region was in both the shadow and device state we don't 711 * need to send a VHOST_USER_ADD_MEM_REG message for it. 712 */ 713 if (found[i]) { 714 continue; 715 } 716 717 add_reg[add_idx].region = reg; 718 add_reg[add_idx].reg_idx = i; 719 add_reg[add_idx++].fd_idx = fd_num; 720 } 721 *nr_rem_reg = rm_idx; 722 *nr_add_reg = add_idx; 723 724 return; 725 } 726 727 static int send_remove_regions(struct vhost_dev *dev, 728 struct scrub_regions *remove_reg, 729 int nr_rem_reg, VhostUserMsg *msg, 730 bool reply_supported) 731 { 732 struct vhost_user *u = dev->opaque; 733 struct vhost_memory_region *shadow_reg; 734 int i, fd, shadow_reg_idx, ret; 735 ram_addr_t offset; 736 VhostUserMemoryRegion region_buffer; 737 738 /* 739 * The regions in remove_reg appear in the same order they do in the 740 * shadow table. Therefore we can minimize memory copies by iterating 741 * through remove_reg backwards. 742 */ 743 for (i = nr_rem_reg - 1; i >= 0; i--) { 744 shadow_reg = remove_reg[i].region; 745 shadow_reg_idx = remove_reg[i].reg_idx; 746 747 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 748 749 if (fd > 0) { 750 msg->hdr.request = VHOST_USER_REM_MEM_REG; 751 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 752 msg->payload.mem_reg.region = region_buffer; 753 754 ret = vhost_user_write(dev, msg, &fd, 1); 755 if (ret < 0) { 756 return ret; 757 } 758 759 if (reply_supported) { 760 ret = process_message_reply(dev, msg); 761 if (ret) { 762 return ret; 763 } 764 } 765 } 766 767 /* 768 * At this point we know the backend has unmapped the region. It is now 769 * safe to remove it from the shadow table. 770 */ 771 memmove(&u->shadow_regions[shadow_reg_idx], 772 &u->shadow_regions[shadow_reg_idx + 1], 773 sizeof(struct vhost_memory_region) * 774 (u->num_shadow_regions - shadow_reg_idx - 1)); 775 u->num_shadow_regions--; 776 } 777 778 return 0; 779 } 780 781 static int send_add_regions(struct vhost_dev *dev, 782 struct scrub_regions *add_reg, int nr_add_reg, 783 VhostUserMsg *msg, uint64_t *shadow_pcb, 784 bool reply_supported, bool track_ramblocks) 785 { 786 struct vhost_user *u = dev->opaque; 787 int i, fd, ret, reg_idx, reg_fd_idx; 788 struct vhost_memory_region *reg; 789 MemoryRegion *mr; 790 ram_addr_t offset; 791 VhostUserMsg msg_reply; 792 VhostUserMemoryRegion region_buffer; 793 794 for (i = 0; i < nr_add_reg; i++) { 795 reg = add_reg[i].region; 796 reg_idx = add_reg[i].reg_idx; 797 reg_fd_idx = add_reg[i].fd_idx; 798 799 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 800 801 if (fd > 0) { 802 if (track_ramblocks) { 803 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 804 reg->memory_size, 805 reg->guest_phys_addr, 806 reg->userspace_addr, 807 offset); 808 u->region_rb_offset[reg_idx] = offset; 809 u->region_rb[reg_idx] = mr->ram_block; 810 } 811 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 812 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 813 msg->payload.mem_reg.region = region_buffer; 814 815 ret = vhost_user_write(dev, msg, &fd, 1); 816 if (ret < 0) { 817 return ret; 818 } 819 820 if (track_ramblocks) { 821 uint64_t reply_gpa; 822 823 ret = vhost_user_read(dev, &msg_reply); 824 if (ret < 0) { 825 return ret; 826 } 827 828 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 829 830 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 831 error_report("%s: Received unexpected msg type." 832 "Expected %d received %d", __func__, 833 VHOST_USER_ADD_MEM_REG, 834 msg_reply.hdr.request); 835 return -EPROTO; 836 } 837 838 /* 839 * We're using the same structure, just reusing one of the 840 * fields, so it should be the same size. 841 */ 842 if (msg_reply.hdr.size != msg->hdr.size) { 843 error_report("%s: Unexpected size for postcopy reply " 844 "%d vs %d", __func__, msg_reply.hdr.size, 845 msg->hdr.size); 846 return -EPROTO; 847 } 848 849 /* Get the postcopy client base from the backend's reply. */ 850 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 851 shadow_pcb[reg_idx] = 852 msg_reply.payload.mem_reg.region.userspace_addr; 853 trace_vhost_user_set_mem_table_postcopy( 854 msg_reply.payload.mem_reg.region.userspace_addr, 855 msg->payload.mem_reg.region.userspace_addr, 856 reg_fd_idx, reg_idx); 857 } else { 858 error_report("%s: invalid postcopy reply for region. " 859 "Got guest physical address %" PRIX64 ", expected " 860 "%" PRIX64, __func__, reply_gpa, 861 dev->mem->regions[reg_idx].guest_phys_addr); 862 return -EPROTO; 863 } 864 } else if (reply_supported) { 865 ret = process_message_reply(dev, msg); 866 if (ret) { 867 return ret; 868 } 869 } 870 } else if (track_ramblocks) { 871 u->region_rb_offset[reg_idx] = 0; 872 u->region_rb[reg_idx] = NULL; 873 } 874 875 /* 876 * At this point, we know the backend has mapped in the new 877 * region, if the region has a valid file descriptor. 878 * 879 * The region should now be added to the shadow table. 880 */ 881 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 882 reg->guest_phys_addr; 883 u->shadow_regions[u->num_shadow_regions].userspace_addr = 884 reg->userspace_addr; 885 u->shadow_regions[u->num_shadow_regions].memory_size = 886 reg->memory_size; 887 u->num_shadow_regions++; 888 } 889 890 return 0; 891 } 892 893 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 894 VhostUserMsg *msg, 895 bool reply_supported, 896 bool track_ramblocks) 897 { 898 struct vhost_user *u = dev->opaque; 899 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 900 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 901 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 902 int nr_add_reg, nr_rem_reg; 903 int ret; 904 905 msg->hdr.size = sizeof(msg->payload.mem_reg); 906 907 /* Find the regions which need to be removed or added. */ 908 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 909 shadow_pcb, track_ramblocks); 910 911 if (nr_rem_reg) { 912 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 913 reply_supported); 914 if (ret < 0) { 915 goto err; 916 } 917 } 918 919 if (nr_add_reg) { 920 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 921 reply_supported, track_ramblocks); 922 if (ret < 0) { 923 goto err; 924 } 925 } 926 927 if (track_ramblocks) { 928 memcpy(u->postcopy_client_bases, shadow_pcb, 929 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 930 /* 931 * Now we've registered this with the postcopy code, we ack to the 932 * client, because now we're in the position to be able to deal with 933 * any faults it generates. 934 */ 935 /* TODO: Use this for failure cases as well with a bad value. */ 936 msg->hdr.size = sizeof(msg->payload.u64); 937 msg->payload.u64 = 0; /* OK */ 938 939 ret = vhost_user_write(dev, msg, NULL, 0); 940 if (ret < 0) { 941 return ret; 942 } 943 } 944 945 return 0; 946 947 err: 948 if (track_ramblocks) { 949 memcpy(u->postcopy_client_bases, shadow_pcb, 950 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 951 } 952 953 return ret; 954 } 955 956 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 957 struct vhost_memory *mem, 958 bool reply_supported, 959 bool config_mem_slots) 960 { 961 struct vhost_user *u = dev->opaque; 962 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 963 size_t fd_num = 0; 964 VhostUserMsg msg_reply; 965 int region_i, msg_i; 966 int ret; 967 968 VhostUserMsg msg = { 969 .hdr.flags = VHOST_USER_VERSION, 970 }; 971 972 if (u->region_rb_len < dev->mem->nregions) { 973 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 974 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 975 dev->mem->nregions); 976 memset(&(u->region_rb[u->region_rb_len]), '\0', 977 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 978 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 979 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 980 u->region_rb_len = dev->mem->nregions; 981 } 982 983 if (config_mem_slots) { 984 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 985 if (ret < 0) { 986 return ret; 987 } 988 } else { 989 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 990 true); 991 if (ret < 0) { 992 return ret; 993 } 994 995 ret = vhost_user_write(dev, &msg, fds, fd_num); 996 if (ret < 0) { 997 return ret; 998 } 999 1000 ret = vhost_user_read(dev, &msg_reply); 1001 if (ret < 0) { 1002 return ret; 1003 } 1004 1005 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 1006 error_report("%s: Received unexpected msg type." 1007 "Expected %d received %d", __func__, 1008 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 1009 return -EPROTO; 1010 } 1011 1012 /* 1013 * We're using the same structure, just reusing one of the 1014 * fields, so it should be the same size. 1015 */ 1016 if (msg_reply.hdr.size != msg.hdr.size) { 1017 error_report("%s: Unexpected size for postcopy reply " 1018 "%d vs %d", __func__, msg_reply.hdr.size, 1019 msg.hdr.size); 1020 return -EPROTO; 1021 } 1022 1023 memset(u->postcopy_client_bases, 0, 1024 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1025 1026 /* 1027 * They're in the same order as the regions that were sent 1028 * but some of the regions were skipped (above) if they 1029 * didn't have fd's 1030 */ 1031 for (msg_i = 0, region_i = 0; 1032 region_i < dev->mem->nregions; 1033 region_i++) { 1034 if (msg_i < fd_num && 1035 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1036 dev->mem->regions[region_i].guest_phys_addr) { 1037 u->postcopy_client_bases[region_i] = 1038 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1039 trace_vhost_user_set_mem_table_postcopy( 1040 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1041 msg.payload.memory.regions[msg_i].userspace_addr, 1042 msg_i, region_i); 1043 msg_i++; 1044 } 1045 } 1046 if (msg_i != fd_num) { 1047 error_report("%s: postcopy reply not fully consumed " 1048 "%d vs %zd", 1049 __func__, msg_i, fd_num); 1050 return -EIO; 1051 } 1052 1053 /* 1054 * Now we've registered this with the postcopy code, we ack to the 1055 * client, because now we're in the position to be able to deal 1056 * with any faults it generates. 1057 */ 1058 /* TODO: Use this for failure cases as well with a bad value. */ 1059 msg.hdr.size = sizeof(msg.payload.u64); 1060 msg.payload.u64 = 0; /* OK */ 1061 ret = vhost_user_write(dev, &msg, NULL, 0); 1062 if (ret < 0) { 1063 return ret; 1064 } 1065 } 1066 1067 return 0; 1068 } 1069 1070 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1071 struct vhost_memory *mem) 1072 { 1073 struct vhost_user *u = dev->opaque; 1074 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1075 size_t fd_num = 0; 1076 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1077 bool reply_supported = virtio_has_feature(dev->protocol_features, 1078 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1079 bool config_mem_slots = 1080 virtio_has_feature(dev->protocol_features, 1081 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1082 int ret; 1083 1084 if (do_postcopy) { 1085 /* 1086 * Postcopy has enough differences that it's best done in it's own 1087 * version 1088 */ 1089 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1090 config_mem_slots); 1091 } 1092 1093 VhostUserMsg msg = { 1094 .hdr.flags = VHOST_USER_VERSION, 1095 }; 1096 1097 if (reply_supported) { 1098 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1099 } 1100 1101 if (config_mem_slots) { 1102 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1103 if (ret < 0) { 1104 return ret; 1105 } 1106 } else { 1107 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1108 false); 1109 if (ret < 0) { 1110 return ret; 1111 } 1112 1113 ret = vhost_user_write(dev, &msg, fds, fd_num); 1114 if (ret < 0) { 1115 return ret; 1116 } 1117 1118 if (reply_supported) { 1119 return process_message_reply(dev, &msg); 1120 } 1121 } 1122 1123 return 0; 1124 } 1125 1126 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1127 struct vhost_vring_state *ring) 1128 { 1129 bool cross_endian = virtio_has_feature(dev->protocol_features, 1130 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1131 VhostUserMsg msg = { 1132 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1133 .hdr.flags = VHOST_USER_VERSION, 1134 .payload.state = *ring, 1135 .hdr.size = sizeof(msg.payload.state), 1136 }; 1137 1138 if (!cross_endian) { 1139 error_report("vhost-user trying to send unhandled ioctl"); 1140 return -ENOTSUP; 1141 } 1142 1143 return vhost_user_write(dev, &msg, NULL, 0); 1144 } 1145 1146 static int vhost_set_vring(struct vhost_dev *dev, 1147 unsigned long int request, 1148 struct vhost_vring_state *ring) 1149 { 1150 VhostUserMsg msg = { 1151 .hdr.request = request, 1152 .hdr.flags = VHOST_USER_VERSION, 1153 .payload.state = *ring, 1154 .hdr.size = sizeof(msg.payload.state), 1155 }; 1156 1157 return vhost_user_write(dev, &msg, NULL, 0); 1158 } 1159 1160 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1161 struct vhost_vring_state *ring) 1162 { 1163 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1164 } 1165 1166 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1167 { 1168 assert(n && n->unmap_addr); 1169 munmap(n->unmap_addr, qemu_real_host_page_size()); 1170 n->unmap_addr = NULL; 1171 } 1172 1173 static void vhost_user_host_notifier_remove(VhostUserState *user, 1174 VirtIODevice *vdev, int queue_idx) 1175 { 1176 VhostUserHostNotifier *n = &user->notifier[queue_idx]; 1177 1178 if (n->addr) { 1179 if (vdev) { 1180 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1181 } 1182 assert(!n->unmap_addr); 1183 n->unmap_addr = n->addr; 1184 n->addr = NULL; 1185 call_rcu(n, vhost_user_host_notifier_free, rcu); 1186 } 1187 } 1188 1189 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1190 struct vhost_vring_state *ring) 1191 { 1192 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1193 } 1194 1195 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1196 { 1197 int i; 1198 1199 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1200 return -EINVAL; 1201 } 1202 1203 for (i = 0; i < dev->nvqs; ++i) { 1204 int ret; 1205 struct vhost_vring_state state = { 1206 .index = dev->vq_index + i, 1207 .num = enable, 1208 }; 1209 1210 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1211 if (ret < 0) { 1212 /* 1213 * Restoring the previous state is likely infeasible, as well as 1214 * proceeding regardless the error, so just bail out and hope for 1215 * the device-level recovery. 1216 */ 1217 return ret; 1218 } 1219 } 1220 1221 return 0; 1222 } 1223 1224 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1225 struct vhost_vring_state *ring) 1226 { 1227 int ret; 1228 VhostUserMsg msg = { 1229 .hdr.request = VHOST_USER_GET_VRING_BASE, 1230 .hdr.flags = VHOST_USER_VERSION, 1231 .payload.state = *ring, 1232 .hdr.size = sizeof(msg.payload.state), 1233 }; 1234 struct vhost_user *u = dev->opaque; 1235 1236 vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index); 1237 1238 ret = vhost_user_write(dev, &msg, NULL, 0); 1239 if (ret < 0) { 1240 return ret; 1241 } 1242 1243 ret = vhost_user_read(dev, &msg); 1244 if (ret < 0) { 1245 return ret; 1246 } 1247 1248 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1249 error_report("Received unexpected msg type. Expected %d received %d", 1250 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1251 return -EPROTO; 1252 } 1253 1254 if (msg.hdr.size != sizeof(msg.payload.state)) { 1255 error_report("Received bad msg size."); 1256 return -EPROTO; 1257 } 1258 1259 *ring = msg.payload.state; 1260 1261 return 0; 1262 } 1263 1264 static int vhost_set_vring_file(struct vhost_dev *dev, 1265 VhostUserRequest request, 1266 struct vhost_vring_file *file) 1267 { 1268 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1269 size_t fd_num = 0; 1270 VhostUserMsg msg = { 1271 .hdr.request = request, 1272 .hdr.flags = VHOST_USER_VERSION, 1273 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1274 .hdr.size = sizeof(msg.payload.u64), 1275 }; 1276 1277 if (ioeventfd_enabled() && file->fd > 0) { 1278 fds[fd_num++] = file->fd; 1279 } else { 1280 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1281 } 1282 1283 return vhost_user_write(dev, &msg, fds, fd_num); 1284 } 1285 1286 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1287 struct vhost_vring_file *file) 1288 { 1289 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1290 } 1291 1292 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1293 struct vhost_vring_file *file) 1294 { 1295 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1296 } 1297 1298 1299 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1300 { 1301 int ret; 1302 VhostUserMsg msg = { 1303 .hdr.request = request, 1304 .hdr.flags = VHOST_USER_VERSION, 1305 }; 1306 1307 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1308 return 0; 1309 } 1310 1311 ret = vhost_user_write(dev, &msg, NULL, 0); 1312 if (ret < 0) { 1313 return ret; 1314 } 1315 1316 ret = vhost_user_read(dev, &msg); 1317 if (ret < 0) { 1318 return ret; 1319 } 1320 1321 if (msg.hdr.request != request) { 1322 error_report("Received unexpected msg type. Expected %d received %d", 1323 request, msg.hdr.request); 1324 return -EPROTO; 1325 } 1326 1327 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1328 error_report("Received bad msg size."); 1329 return -EPROTO; 1330 } 1331 1332 *u64 = msg.payload.u64; 1333 1334 return 0; 1335 } 1336 1337 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1338 { 1339 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1340 return -EPROTO; 1341 } 1342 1343 return 0; 1344 } 1345 1346 static int enforce_reply(struct vhost_dev *dev, 1347 const VhostUserMsg *msg) 1348 { 1349 uint64_t dummy; 1350 1351 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1352 return process_message_reply(dev, msg); 1353 } 1354 1355 /* 1356 * We need to wait for a reply but the backend does not 1357 * support replies for the command we just sent. 1358 * Send VHOST_USER_GET_FEATURES which makes all backends 1359 * send a reply. 1360 */ 1361 return vhost_user_get_features(dev, &dummy); 1362 } 1363 1364 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1365 struct vhost_vring_addr *addr) 1366 { 1367 int ret; 1368 VhostUserMsg msg = { 1369 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1370 .hdr.flags = VHOST_USER_VERSION, 1371 .payload.addr = *addr, 1372 .hdr.size = sizeof(msg.payload.addr), 1373 }; 1374 1375 bool reply_supported = virtio_has_feature(dev->protocol_features, 1376 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1377 1378 /* 1379 * wait for a reply if logging is enabled to make sure 1380 * backend is actually logging changes 1381 */ 1382 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1383 1384 if (reply_supported && wait_for_reply) { 1385 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1386 } 1387 1388 ret = vhost_user_write(dev, &msg, NULL, 0); 1389 if (ret < 0) { 1390 return ret; 1391 } 1392 1393 if (wait_for_reply) { 1394 return enforce_reply(dev, &msg); 1395 } 1396 1397 return 0; 1398 } 1399 1400 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1401 bool wait_for_reply) 1402 { 1403 VhostUserMsg msg = { 1404 .hdr.request = request, 1405 .hdr.flags = VHOST_USER_VERSION, 1406 .payload.u64 = u64, 1407 .hdr.size = sizeof(msg.payload.u64), 1408 }; 1409 int ret; 1410 1411 if (wait_for_reply) { 1412 bool reply_supported = virtio_has_feature(dev->protocol_features, 1413 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1414 if (reply_supported) { 1415 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1416 } 1417 } 1418 1419 ret = vhost_user_write(dev, &msg, NULL, 0); 1420 if (ret < 0) { 1421 return ret; 1422 } 1423 1424 if (wait_for_reply) { 1425 return enforce_reply(dev, &msg); 1426 } 1427 1428 return 0; 1429 } 1430 1431 static int vhost_user_set_features(struct vhost_dev *dev, 1432 uint64_t features) 1433 { 1434 /* 1435 * wait for a reply if logging is enabled to make sure 1436 * backend is actually logging changes 1437 */ 1438 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1439 1440 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, 1441 log_enabled); 1442 } 1443 1444 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1445 uint64_t features) 1446 { 1447 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1448 false); 1449 } 1450 1451 static int vhost_user_set_owner(struct vhost_dev *dev) 1452 { 1453 VhostUserMsg msg = { 1454 .hdr.request = VHOST_USER_SET_OWNER, 1455 .hdr.flags = VHOST_USER_VERSION, 1456 }; 1457 1458 return vhost_user_write(dev, &msg, NULL, 0); 1459 } 1460 1461 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1462 uint64_t *max_memslots) 1463 { 1464 uint64_t backend_max_memslots; 1465 int err; 1466 1467 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1468 &backend_max_memslots); 1469 if (err < 0) { 1470 return err; 1471 } 1472 1473 *max_memslots = backend_max_memslots; 1474 1475 return 0; 1476 } 1477 1478 static int vhost_user_reset_device(struct vhost_dev *dev) 1479 { 1480 VhostUserMsg msg = { 1481 .hdr.flags = VHOST_USER_VERSION, 1482 }; 1483 1484 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1485 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1486 ? VHOST_USER_RESET_DEVICE 1487 : VHOST_USER_RESET_OWNER; 1488 1489 return vhost_user_write(dev, &msg, NULL, 0); 1490 } 1491 1492 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1493 { 1494 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1495 return -ENOSYS; 1496 } 1497 1498 return dev->config_ops->vhost_dev_config_notifier(dev); 1499 } 1500 1501 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1502 VhostUserVringArea *area, 1503 int fd) 1504 { 1505 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1506 size_t page_size = qemu_real_host_page_size(); 1507 struct vhost_user *u = dev->opaque; 1508 VhostUserState *user = u->user; 1509 VirtIODevice *vdev = dev->vdev; 1510 VhostUserHostNotifier *n; 1511 void *addr; 1512 char *name; 1513 1514 if (!virtio_has_feature(dev->protocol_features, 1515 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1516 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1517 return -EINVAL; 1518 } 1519 1520 n = &user->notifier[queue_idx]; 1521 1522 vhost_user_host_notifier_remove(user, vdev, queue_idx); 1523 1524 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1525 return 0; 1526 } 1527 1528 /* Sanity check. */ 1529 if (area->size != page_size) { 1530 return -EINVAL; 1531 } 1532 1533 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1534 fd, area->offset); 1535 if (addr == MAP_FAILED) { 1536 return -EFAULT; 1537 } 1538 1539 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1540 user, queue_idx); 1541 if (!n->mr.ram) { /* Don't init again after suspend. */ 1542 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1543 page_size, addr); 1544 } else { 1545 n->mr.ram_block->host = addr; 1546 } 1547 g_free(name); 1548 1549 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1550 object_unparent(OBJECT(&n->mr)); 1551 munmap(addr, page_size); 1552 return -ENXIO; 1553 } 1554 1555 n->addr = addr; 1556 1557 return 0; 1558 } 1559 1560 static void close_slave_channel(struct vhost_user *u) 1561 { 1562 g_source_destroy(u->slave_src); 1563 g_source_unref(u->slave_src); 1564 u->slave_src = NULL; 1565 object_unref(OBJECT(u->slave_ioc)); 1566 u->slave_ioc = NULL; 1567 } 1568 1569 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1570 gpointer opaque) 1571 { 1572 struct vhost_dev *dev = opaque; 1573 struct vhost_user *u = dev->opaque; 1574 VhostUserHeader hdr = { 0, }; 1575 VhostUserPayload payload = { 0, }; 1576 Error *local_err = NULL; 1577 gboolean rc = G_SOURCE_CONTINUE; 1578 int ret = 0; 1579 struct iovec iov; 1580 g_autofree int *fd = NULL; 1581 size_t fdsize = 0; 1582 int i; 1583 1584 /* Read header */ 1585 iov.iov_base = &hdr; 1586 iov.iov_len = VHOST_USER_HDR_SIZE; 1587 1588 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1589 error_report_err(local_err); 1590 goto err; 1591 } 1592 1593 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1594 error_report("Failed to read msg header." 1595 " Size %d exceeds the maximum %zu.", hdr.size, 1596 VHOST_USER_PAYLOAD_SIZE); 1597 goto err; 1598 } 1599 1600 /* Read payload */ 1601 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1602 error_report_err(local_err); 1603 goto err; 1604 } 1605 1606 switch (hdr.request) { 1607 case VHOST_USER_SLAVE_IOTLB_MSG: 1608 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1609 break; 1610 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1611 ret = vhost_user_slave_handle_config_change(dev); 1612 break; 1613 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1614 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1615 fd ? fd[0] : -1); 1616 break; 1617 default: 1618 error_report("Received unexpected msg type: %d.", hdr.request); 1619 ret = -EINVAL; 1620 } 1621 1622 /* 1623 * REPLY_ACK feature handling. Other reply types has to be managed 1624 * directly in their request handlers. 1625 */ 1626 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1627 struct iovec iovec[2]; 1628 1629 1630 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1631 hdr.flags |= VHOST_USER_REPLY_MASK; 1632 1633 payload.u64 = !!ret; 1634 hdr.size = sizeof(payload.u64); 1635 1636 iovec[0].iov_base = &hdr; 1637 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1638 iovec[1].iov_base = &payload; 1639 iovec[1].iov_len = hdr.size; 1640 1641 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1642 error_report_err(local_err); 1643 goto err; 1644 } 1645 } 1646 1647 goto fdcleanup; 1648 1649 err: 1650 close_slave_channel(u); 1651 rc = G_SOURCE_REMOVE; 1652 1653 fdcleanup: 1654 if (fd) { 1655 for (i = 0; i < fdsize; i++) { 1656 close(fd[i]); 1657 } 1658 } 1659 return rc; 1660 } 1661 1662 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1663 { 1664 VhostUserMsg msg = { 1665 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1666 .hdr.flags = VHOST_USER_VERSION, 1667 }; 1668 struct vhost_user *u = dev->opaque; 1669 int sv[2], ret = 0; 1670 bool reply_supported = virtio_has_feature(dev->protocol_features, 1671 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1672 Error *local_err = NULL; 1673 QIOChannel *ioc; 1674 1675 if (!virtio_has_feature(dev->protocol_features, 1676 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1677 return 0; 1678 } 1679 1680 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1681 int saved_errno = errno; 1682 error_report("socketpair() failed"); 1683 return -saved_errno; 1684 } 1685 1686 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1687 if (!ioc) { 1688 error_report_err(local_err); 1689 return -ECONNREFUSED; 1690 } 1691 u->slave_ioc = ioc; 1692 slave_update_read_handler(dev, NULL); 1693 1694 if (reply_supported) { 1695 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1696 } 1697 1698 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1699 if (ret) { 1700 goto out; 1701 } 1702 1703 if (reply_supported) { 1704 ret = process_message_reply(dev, &msg); 1705 } 1706 1707 out: 1708 close(sv[1]); 1709 if (ret) { 1710 close_slave_channel(u); 1711 } 1712 1713 return ret; 1714 } 1715 1716 #ifdef CONFIG_LINUX 1717 /* 1718 * Called back from the postcopy fault thread when a fault is received on our 1719 * ufd. 1720 * TODO: This is Linux specific 1721 */ 1722 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1723 void *ufd) 1724 { 1725 struct vhost_dev *dev = pcfd->data; 1726 struct vhost_user *u = dev->opaque; 1727 struct uffd_msg *msg = ufd; 1728 uint64_t faultaddr = msg->arg.pagefault.address; 1729 RAMBlock *rb = NULL; 1730 uint64_t rb_offset; 1731 int i; 1732 1733 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1734 dev->mem->nregions); 1735 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1736 trace_vhost_user_postcopy_fault_handler_loop(i, 1737 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1738 if (faultaddr >= u->postcopy_client_bases[i]) { 1739 /* Ofset of the fault address in the vhost region */ 1740 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1741 if (region_offset < dev->mem->regions[i].memory_size) { 1742 rb_offset = region_offset + u->region_rb_offset[i]; 1743 trace_vhost_user_postcopy_fault_handler_found(i, 1744 region_offset, rb_offset); 1745 rb = u->region_rb[i]; 1746 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1747 rb_offset); 1748 } 1749 } 1750 } 1751 error_report("%s: Failed to find region for fault %" PRIx64, 1752 __func__, faultaddr); 1753 return -1; 1754 } 1755 1756 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1757 uint64_t offset) 1758 { 1759 struct vhost_dev *dev = pcfd->data; 1760 struct vhost_user *u = dev->opaque; 1761 int i; 1762 1763 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1764 1765 if (!u) { 1766 return 0; 1767 } 1768 /* Translate the offset into an address in the clients address space */ 1769 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1770 if (u->region_rb[i] == rb && 1771 offset >= u->region_rb_offset[i] && 1772 offset < (u->region_rb_offset[i] + 1773 dev->mem->regions[i].memory_size)) { 1774 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1775 u->postcopy_client_bases[i]; 1776 trace_vhost_user_postcopy_waker_found(client_addr); 1777 return postcopy_wake_shared(pcfd, client_addr, rb); 1778 } 1779 } 1780 1781 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1782 return 0; 1783 } 1784 #endif 1785 1786 /* 1787 * Called at the start of an inbound postcopy on reception of the 1788 * 'advise' command. 1789 */ 1790 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1791 { 1792 #ifdef CONFIG_LINUX 1793 struct vhost_user *u = dev->opaque; 1794 CharBackend *chr = u->user->chr; 1795 int ufd; 1796 int ret; 1797 VhostUserMsg msg = { 1798 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1799 .hdr.flags = VHOST_USER_VERSION, 1800 }; 1801 1802 ret = vhost_user_write(dev, &msg, NULL, 0); 1803 if (ret < 0) { 1804 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1805 return ret; 1806 } 1807 1808 ret = vhost_user_read(dev, &msg); 1809 if (ret < 0) { 1810 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1811 return ret; 1812 } 1813 1814 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1815 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1816 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1817 return -EPROTO; 1818 } 1819 1820 if (msg.hdr.size) { 1821 error_setg(errp, "Received bad msg size."); 1822 return -EPROTO; 1823 } 1824 ufd = qemu_chr_fe_get_msgfd(chr); 1825 if (ufd < 0) { 1826 error_setg(errp, "%s: Failed to get ufd", __func__); 1827 return -EIO; 1828 } 1829 qemu_set_nonblock(ufd); 1830 1831 /* register ufd with userfault thread */ 1832 u->postcopy_fd.fd = ufd; 1833 u->postcopy_fd.data = dev; 1834 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1835 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1836 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1837 postcopy_register_shared_ufd(&u->postcopy_fd); 1838 return 0; 1839 #else 1840 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1841 return -ENOSYS; 1842 #endif 1843 } 1844 1845 /* 1846 * Called at the switch to postcopy on reception of the 'listen' command. 1847 */ 1848 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1849 { 1850 struct vhost_user *u = dev->opaque; 1851 int ret; 1852 VhostUserMsg msg = { 1853 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1854 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1855 }; 1856 u->postcopy_listen = true; 1857 1858 trace_vhost_user_postcopy_listen(); 1859 1860 ret = vhost_user_write(dev, &msg, NULL, 0); 1861 if (ret < 0) { 1862 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1863 return ret; 1864 } 1865 1866 ret = process_message_reply(dev, &msg); 1867 if (ret) { 1868 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1869 return ret; 1870 } 1871 1872 return 0; 1873 } 1874 1875 /* 1876 * Called at the end of postcopy 1877 */ 1878 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1879 { 1880 VhostUserMsg msg = { 1881 .hdr.request = VHOST_USER_POSTCOPY_END, 1882 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1883 }; 1884 int ret; 1885 struct vhost_user *u = dev->opaque; 1886 1887 trace_vhost_user_postcopy_end_entry(); 1888 1889 ret = vhost_user_write(dev, &msg, NULL, 0); 1890 if (ret < 0) { 1891 error_setg(errp, "Failed to send postcopy_end to vhost"); 1892 return ret; 1893 } 1894 1895 ret = process_message_reply(dev, &msg); 1896 if (ret) { 1897 error_setg(errp, "Failed to receive reply to postcopy_end"); 1898 return ret; 1899 } 1900 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1901 close(u->postcopy_fd.fd); 1902 u->postcopy_fd.handler = NULL; 1903 1904 trace_vhost_user_postcopy_end_exit(); 1905 1906 return 0; 1907 } 1908 1909 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1910 void *opaque) 1911 { 1912 struct PostcopyNotifyData *pnd = opaque; 1913 struct vhost_user *u = container_of(notifier, struct vhost_user, 1914 postcopy_notifier); 1915 struct vhost_dev *dev = u->dev; 1916 1917 switch (pnd->reason) { 1918 case POSTCOPY_NOTIFY_PROBE: 1919 if (!virtio_has_feature(dev->protocol_features, 1920 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1921 /* TODO: Get the device name into this error somehow */ 1922 error_setg(pnd->errp, 1923 "vhost-user backend not capable of postcopy"); 1924 return -ENOENT; 1925 } 1926 break; 1927 1928 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1929 return vhost_user_postcopy_advise(dev, pnd->errp); 1930 1931 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1932 return vhost_user_postcopy_listen(dev, pnd->errp); 1933 1934 case POSTCOPY_NOTIFY_INBOUND_END: 1935 return vhost_user_postcopy_end(dev, pnd->errp); 1936 1937 default: 1938 /* We ignore notifications we don't know */ 1939 break; 1940 } 1941 1942 return 0; 1943 } 1944 1945 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1946 Error **errp) 1947 { 1948 uint64_t features, protocol_features, ram_slots; 1949 struct vhost_user *u; 1950 int err; 1951 1952 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1953 1954 u = g_new0(struct vhost_user, 1); 1955 u->user = opaque; 1956 u->dev = dev; 1957 dev->opaque = u; 1958 1959 err = vhost_user_get_features(dev, &features); 1960 if (err < 0) { 1961 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1962 return err; 1963 } 1964 1965 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1966 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1967 1968 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1969 &protocol_features); 1970 if (err < 0) { 1971 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1972 return -EPROTO; 1973 } 1974 1975 dev->protocol_features = 1976 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1977 1978 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1979 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1980 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1981 } else if (!(protocol_features & 1982 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1983 error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1984 "but backend does not support it."); 1985 return -EINVAL; 1986 } 1987 1988 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1989 if (err < 0) { 1990 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1991 return -EPROTO; 1992 } 1993 1994 /* query the max queues we support if backend supports Multiple Queue */ 1995 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1996 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1997 &dev->max_queues); 1998 if (err < 0) { 1999 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2000 return -EPROTO; 2001 } 2002 } else { 2003 dev->max_queues = 1; 2004 } 2005 2006 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2007 error_setg(errp, "The maximum number of queues supported by the " 2008 "backend is %" PRIu64, dev->max_queues); 2009 return -EINVAL; 2010 } 2011 2012 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2013 !(virtio_has_feature(dev->protocol_features, 2014 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 2015 virtio_has_feature(dev->protocol_features, 2016 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2017 error_setg(errp, "IOMMU support requires reply-ack and " 2018 "slave-req protocol features."); 2019 return -EINVAL; 2020 } 2021 2022 /* get max memory regions if backend supports configurable RAM slots */ 2023 if (!virtio_has_feature(dev->protocol_features, 2024 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2025 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2026 } else { 2027 err = vhost_user_get_max_memslots(dev, &ram_slots); 2028 if (err < 0) { 2029 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2030 return -EPROTO; 2031 } 2032 2033 if (ram_slots < u->user->memory_slots) { 2034 error_setg(errp, "The backend specified a max ram slots limit " 2035 "of %" PRIu64", when the prior validated limit was " 2036 "%d. This limit should never decrease.", ram_slots, 2037 u->user->memory_slots); 2038 return -EINVAL; 2039 } 2040 2041 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2042 } 2043 } 2044 2045 if (dev->migration_blocker == NULL && 2046 !virtio_has_feature(dev->protocol_features, 2047 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2048 error_setg(&dev->migration_blocker, 2049 "Migration disabled: vhost-user backend lacks " 2050 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2051 } 2052 2053 if (dev->vq_index == 0) { 2054 err = vhost_setup_slave_channel(dev); 2055 if (err < 0) { 2056 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2057 return -EPROTO; 2058 } 2059 } 2060 2061 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2062 postcopy_add_notifier(&u->postcopy_notifier); 2063 2064 return 0; 2065 } 2066 2067 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2068 { 2069 struct vhost_user *u; 2070 2071 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2072 2073 u = dev->opaque; 2074 if (u->postcopy_notifier.notify) { 2075 postcopy_remove_notifier(&u->postcopy_notifier); 2076 u->postcopy_notifier.notify = NULL; 2077 } 2078 u->postcopy_listen = false; 2079 if (u->postcopy_fd.handler) { 2080 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2081 close(u->postcopy_fd.fd); 2082 u->postcopy_fd.handler = NULL; 2083 } 2084 if (u->slave_ioc) { 2085 close_slave_channel(u); 2086 } 2087 g_free(u->region_rb); 2088 u->region_rb = NULL; 2089 g_free(u->region_rb_offset); 2090 u->region_rb_offset = NULL; 2091 u->region_rb_len = 0; 2092 g_free(u); 2093 dev->opaque = 0; 2094 2095 return 0; 2096 } 2097 2098 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2099 { 2100 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2101 2102 return idx; 2103 } 2104 2105 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2106 { 2107 struct vhost_user *u = dev->opaque; 2108 2109 return u->user->memory_slots; 2110 } 2111 2112 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2113 { 2114 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2115 2116 return virtio_has_feature(dev->protocol_features, 2117 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2118 } 2119 2120 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2121 { 2122 VhostUserMsg msg = { }; 2123 2124 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2125 2126 /* If guest supports GUEST_ANNOUNCE do nothing */ 2127 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2128 return 0; 2129 } 2130 2131 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2132 if (virtio_has_feature(dev->protocol_features, 2133 VHOST_USER_PROTOCOL_F_RARP)) { 2134 msg.hdr.request = VHOST_USER_SEND_RARP; 2135 msg.hdr.flags = VHOST_USER_VERSION; 2136 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2137 msg.hdr.size = sizeof(msg.payload.u64); 2138 2139 return vhost_user_write(dev, &msg, NULL, 0); 2140 } 2141 return -ENOTSUP; 2142 } 2143 2144 static bool vhost_user_can_merge(struct vhost_dev *dev, 2145 uint64_t start1, uint64_t size1, 2146 uint64_t start2, uint64_t size2) 2147 { 2148 ram_addr_t offset; 2149 int mfd, rfd; 2150 2151 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2152 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2153 2154 return mfd == rfd; 2155 } 2156 2157 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2158 { 2159 VhostUserMsg msg; 2160 bool reply_supported = virtio_has_feature(dev->protocol_features, 2161 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2162 int ret; 2163 2164 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2165 return 0; 2166 } 2167 2168 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2169 msg.payload.u64 = mtu; 2170 msg.hdr.size = sizeof(msg.payload.u64); 2171 msg.hdr.flags = VHOST_USER_VERSION; 2172 if (reply_supported) { 2173 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2174 } 2175 2176 ret = vhost_user_write(dev, &msg, NULL, 0); 2177 if (ret < 0) { 2178 return ret; 2179 } 2180 2181 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2182 if (reply_supported) { 2183 return process_message_reply(dev, &msg); 2184 } 2185 2186 return 0; 2187 } 2188 2189 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2190 struct vhost_iotlb_msg *imsg) 2191 { 2192 int ret; 2193 VhostUserMsg msg = { 2194 .hdr.request = VHOST_USER_IOTLB_MSG, 2195 .hdr.size = sizeof(msg.payload.iotlb), 2196 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2197 .payload.iotlb = *imsg, 2198 }; 2199 2200 ret = vhost_user_write(dev, &msg, NULL, 0); 2201 if (ret < 0) { 2202 return ret; 2203 } 2204 2205 return process_message_reply(dev, &msg); 2206 } 2207 2208 2209 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2210 { 2211 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2212 } 2213 2214 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2215 uint32_t config_len, Error **errp) 2216 { 2217 int ret; 2218 VhostUserMsg msg = { 2219 .hdr.request = VHOST_USER_GET_CONFIG, 2220 .hdr.flags = VHOST_USER_VERSION, 2221 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2222 }; 2223 2224 if (!virtio_has_feature(dev->protocol_features, 2225 VHOST_USER_PROTOCOL_F_CONFIG)) { 2226 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2227 return -EINVAL; 2228 } 2229 2230 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2231 2232 msg.payload.config.offset = 0; 2233 msg.payload.config.size = config_len; 2234 ret = vhost_user_write(dev, &msg, NULL, 0); 2235 if (ret < 0) { 2236 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2237 return ret; 2238 } 2239 2240 ret = vhost_user_read(dev, &msg); 2241 if (ret < 0) { 2242 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2243 return ret; 2244 } 2245 2246 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2247 error_setg(errp, 2248 "Received unexpected msg type. Expected %d received %d", 2249 VHOST_USER_GET_CONFIG, msg.hdr.request); 2250 return -EPROTO; 2251 } 2252 2253 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2254 error_setg(errp, "Received bad msg size."); 2255 return -EPROTO; 2256 } 2257 2258 memcpy(config, msg.payload.config.region, config_len); 2259 2260 return 0; 2261 } 2262 2263 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2264 uint32_t offset, uint32_t size, uint32_t flags) 2265 { 2266 int ret; 2267 uint8_t *p; 2268 bool reply_supported = virtio_has_feature(dev->protocol_features, 2269 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2270 2271 VhostUserMsg msg = { 2272 .hdr.request = VHOST_USER_SET_CONFIG, 2273 .hdr.flags = VHOST_USER_VERSION, 2274 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2275 }; 2276 2277 if (!virtio_has_feature(dev->protocol_features, 2278 VHOST_USER_PROTOCOL_F_CONFIG)) { 2279 return -ENOTSUP; 2280 } 2281 2282 if (reply_supported) { 2283 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2284 } 2285 2286 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2287 return -EINVAL; 2288 } 2289 2290 msg.payload.config.offset = offset, 2291 msg.payload.config.size = size, 2292 msg.payload.config.flags = flags, 2293 p = msg.payload.config.region; 2294 memcpy(p, data, size); 2295 2296 ret = vhost_user_write(dev, &msg, NULL, 0); 2297 if (ret < 0) { 2298 return ret; 2299 } 2300 2301 if (reply_supported) { 2302 return process_message_reply(dev, &msg); 2303 } 2304 2305 return 0; 2306 } 2307 2308 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2309 void *session_info, 2310 uint64_t *session_id) 2311 { 2312 int ret; 2313 bool crypto_session = virtio_has_feature(dev->protocol_features, 2314 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2315 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2316 VhostUserMsg msg = { 2317 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2318 .hdr.flags = VHOST_USER_VERSION, 2319 .hdr.size = sizeof(msg.payload.session), 2320 }; 2321 2322 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2323 2324 if (!crypto_session) { 2325 error_report("vhost-user trying to send unhandled ioctl"); 2326 return -ENOTSUP; 2327 } 2328 2329 memcpy(&msg.payload.session.session_setup_data, sess_info, 2330 sizeof(CryptoDevBackendSymSessionInfo)); 2331 if (sess_info->key_len) { 2332 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2333 sess_info->key_len); 2334 } 2335 if (sess_info->auth_key_len > 0) { 2336 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2337 sess_info->auth_key_len); 2338 } 2339 ret = vhost_user_write(dev, &msg, NULL, 0); 2340 if (ret < 0) { 2341 error_report("vhost_user_write() return %d, create session failed", 2342 ret); 2343 return ret; 2344 } 2345 2346 ret = vhost_user_read(dev, &msg); 2347 if (ret < 0) { 2348 error_report("vhost_user_read() return %d, create session failed", 2349 ret); 2350 return ret; 2351 } 2352 2353 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2354 error_report("Received unexpected msg type. Expected %d received %d", 2355 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2356 return -EPROTO; 2357 } 2358 2359 if (msg.hdr.size != sizeof(msg.payload.session)) { 2360 error_report("Received bad msg size."); 2361 return -EPROTO; 2362 } 2363 2364 if (msg.payload.session.session_id < 0) { 2365 error_report("Bad session id: %" PRId64 "", 2366 msg.payload.session.session_id); 2367 return -EINVAL; 2368 } 2369 *session_id = msg.payload.session.session_id; 2370 2371 return 0; 2372 } 2373 2374 static int 2375 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2376 { 2377 int ret; 2378 bool crypto_session = virtio_has_feature(dev->protocol_features, 2379 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2380 VhostUserMsg msg = { 2381 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2382 .hdr.flags = VHOST_USER_VERSION, 2383 .hdr.size = sizeof(msg.payload.u64), 2384 }; 2385 msg.payload.u64 = session_id; 2386 2387 if (!crypto_session) { 2388 error_report("vhost-user trying to send unhandled ioctl"); 2389 return -ENOTSUP; 2390 } 2391 2392 ret = vhost_user_write(dev, &msg, NULL, 0); 2393 if (ret < 0) { 2394 error_report("vhost_user_write() return %d, close session failed", 2395 ret); 2396 return ret; 2397 } 2398 2399 return 0; 2400 } 2401 2402 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2403 MemoryRegionSection *section) 2404 { 2405 bool result; 2406 2407 result = memory_region_get_fd(section->mr) >= 0; 2408 2409 return result; 2410 } 2411 2412 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2413 uint16_t queue_size, 2414 struct vhost_inflight *inflight) 2415 { 2416 void *addr; 2417 int fd; 2418 int ret; 2419 struct vhost_user *u = dev->opaque; 2420 CharBackend *chr = u->user->chr; 2421 VhostUserMsg msg = { 2422 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2423 .hdr.flags = VHOST_USER_VERSION, 2424 .payload.inflight.num_queues = dev->nvqs, 2425 .payload.inflight.queue_size = queue_size, 2426 .hdr.size = sizeof(msg.payload.inflight), 2427 }; 2428 2429 if (!virtio_has_feature(dev->protocol_features, 2430 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2431 return 0; 2432 } 2433 2434 ret = vhost_user_write(dev, &msg, NULL, 0); 2435 if (ret < 0) { 2436 return ret; 2437 } 2438 2439 ret = vhost_user_read(dev, &msg); 2440 if (ret < 0) { 2441 return ret; 2442 } 2443 2444 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2445 error_report("Received unexpected msg type. " 2446 "Expected %d received %d", 2447 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2448 return -EPROTO; 2449 } 2450 2451 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2452 error_report("Received bad msg size."); 2453 return -EPROTO; 2454 } 2455 2456 if (!msg.payload.inflight.mmap_size) { 2457 return 0; 2458 } 2459 2460 fd = qemu_chr_fe_get_msgfd(chr); 2461 if (fd < 0) { 2462 error_report("Failed to get mem fd"); 2463 return -EIO; 2464 } 2465 2466 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2467 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2468 2469 if (addr == MAP_FAILED) { 2470 error_report("Failed to mmap mem fd"); 2471 close(fd); 2472 return -EFAULT; 2473 } 2474 2475 inflight->addr = addr; 2476 inflight->fd = fd; 2477 inflight->size = msg.payload.inflight.mmap_size; 2478 inflight->offset = msg.payload.inflight.mmap_offset; 2479 inflight->queue_size = queue_size; 2480 2481 return 0; 2482 } 2483 2484 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2485 struct vhost_inflight *inflight) 2486 { 2487 VhostUserMsg msg = { 2488 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2489 .hdr.flags = VHOST_USER_VERSION, 2490 .payload.inflight.mmap_size = inflight->size, 2491 .payload.inflight.mmap_offset = inflight->offset, 2492 .payload.inflight.num_queues = dev->nvqs, 2493 .payload.inflight.queue_size = inflight->queue_size, 2494 .hdr.size = sizeof(msg.payload.inflight), 2495 }; 2496 2497 if (!virtio_has_feature(dev->protocol_features, 2498 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2499 return 0; 2500 } 2501 2502 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2503 } 2504 2505 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2506 { 2507 if (user->chr) { 2508 error_setg(errp, "Cannot initialize vhost-user state"); 2509 return false; 2510 } 2511 user->chr = chr; 2512 user->memory_slots = 0; 2513 return true; 2514 } 2515 2516 void vhost_user_cleanup(VhostUserState *user) 2517 { 2518 int i; 2519 VhostUserHostNotifier *n; 2520 2521 if (!user->chr) { 2522 return; 2523 } 2524 memory_region_transaction_begin(); 2525 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2526 n = &user->notifier[i]; 2527 vhost_user_host_notifier_remove(user, NULL, i); 2528 object_unparent(OBJECT(&n->mr)); 2529 } 2530 memory_region_transaction_commit(); 2531 user->chr = NULL; 2532 } 2533 2534 const VhostOps user_ops = { 2535 .backend_type = VHOST_BACKEND_TYPE_USER, 2536 .vhost_backend_init = vhost_user_backend_init, 2537 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2538 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2539 .vhost_set_log_base = vhost_user_set_log_base, 2540 .vhost_set_mem_table = vhost_user_set_mem_table, 2541 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2542 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2543 .vhost_set_vring_num = vhost_user_set_vring_num, 2544 .vhost_set_vring_base = vhost_user_set_vring_base, 2545 .vhost_get_vring_base = vhost_user_get_vring_base, 2546 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2547 .vhost_set_vring_call = vhost_user_set_vring_call, 2548 .vhost_set_features = vhost_user_set_features, 2549 .vhost_get_features = vhost_user_get_features, 2550 .vhost_set_owner = vhost_user_set_owner, 2551 .vhost_reset_device = vhost_user_reset_device, 2552 .vhost_get_vq_index = vhost_user_get_vq_index, 2553 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2554 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2555 .vhost_migration_done = vhost_user_migration_done, 2556 .vhost_backend_can_merge = vhost_user_can_merge, 2557 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2558 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2559 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2560 .vhost_get_config = vhost_user_get_config, 2561 .vhost_set_config = vhost_user_set_config, 2562 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2563 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2564 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2565 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2566 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2567 }; 2568