1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/cryptodev.h" 25 #include "migration/migration.h" 26 #include "migration/postcopy-ram.h" 27 #include "trace.h" 28 29 #include <sys/ioctl.h> 30 #include <sys/socket.h> 31 #include <sys/un.h> 32 33 #include "standard-headers/linux/vhost_types.h" 34 35 #ifdef CONFIG_LINUX 36 #include <linux/userfaultfd.h> 37 #endif 38 39 #define VHOST_MEMORY_BASELINE_NREGIONS 8 40 #define VHOST_USER_F_PROTOCOL_FEATURES 30 41 #define VHOST_USER_SLAVE_MAX_FDS 8 42 43 /* 44 * Set maximum number of RAM slots supported to 45 * the maximum number supported by the target 46 * hardware plaform. 47 */ 48 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 49 defined(TARGET_ARM) || defined(TARGET_ARM_64) 50 #include "hw/acpi/acpi.h" 51 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 52 53 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64) 54 #include "hw/ppc/spapr.h" 55 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 56 57 #else 58 #define VHOST_USER_MAX_RAM_SLOTS 512 59 #endif 60 61 /* 62 * Maximum size of virtio device config space 63 */ 64 #define VHOST_USER_MAX_CONFIG_SIZE 256 65 66 enum VhostUserProtocolFeature { 67 VHOST_USER_PROTOCOL_F_MQ = 0, 68 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 69 VHOST_USER_PROTOCOL_F_RARP = 2, 70 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 71 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 72 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 73 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 74 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 75 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 76 VHOST_USER_PROTOCOL_F_CONFIG = 9, 77 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 78 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 79 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 80 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 81 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 82 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 83 VHOST_USER_PROTOCOL_F_MAX 84 }; 85 86 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 87 88 typedef enum VhostUserRequest { 89 VHOST_USER_NONE = 0, 90 VHOST_USER_GET_FEATURES = 1, 91 VHOST_USER_SET_FEATURES = 2, 92 VHOST_USER_SET_OWNER = 3, 93 VHOST_USER_RESET_OWNER = 4, 94 VHOST_USER_SET_MEM_TABLE = 5, 95 VHOST_USER_SET_LOG_BASE = 6, 96 VHOST_USER_SET_LOG_FD = 7, 97 VHOST_USER_SET_VRING_NUM = 8, 98 VHOST_USER_SET_VRING_ADDR = 9, 99 VHOST_USER_SET_VRING_BASE = 10, 100 VHOST_USER_GET_VRING_BASE = 11, 101 VHOST_USER_SET_VRING_KICK = 12, 102 VHOST_USER_SET_VRING_CALL = 13, 103 VHOST_USER_SET_VRING_ERR = 14, 104 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 105 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 106 VHOST_USER_GET_QUEUE_NUM = 17, 107 VHOST_USER_SET_VRING_ENABLE = 18, 108 VHOST_USER_SEND_RARP = 19, 109 VHOST_USER_NET_SET_MTU = 20, 110 VHOST_USER_SET_SLAVE_REQ_FD = 21, 111 VHOST_USER_IOTLB_MSG = 22, 112 VHOST_USER_SET_VRING_ENDIAN = 23, 113 VHOST_USER_GET_CONFIG = 24, 114 VHOST_USER_SET_CONFIG = 25, 115 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 116 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 117 VHOST_USER_POSTCOPY_ADVISE = 28, 118 VHOST_USER_POSTCOPY_LISTEN = 29, 119 VHOST_USER_POSTCOPY_END = 30, 120 VHOST_USER_GET_INFLIGHT_FD = 31, 121 VHOST_USER_SET_INFLIGHT_FD = 32, 122 VHOST_USER_GPU_SET_SOCKET = 33, 123 VHOST_USER_RESET_DEVICE = 34, 124 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 125 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 126 VHOST_USER_ADD_MEM_REG = 37, 127 VHOST_USER_REM_MEM_REG = 38, 128 VHOST_USER_MAX 129 } VhostUserRequest; 130 131 typedef enum VhostUserSlaveRequest { 132 VHOST_USER_SLAVE_NONE = 0, 133 VHOST_USER_SLAVE_IOTLB_MSG = 1, 134 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 135 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 136 VHOST_USER_SLAVE_MAX 137 } VhostUserSlaveRequest; 138 139 typedef struct VhostUserMemoryRegion { 140 uint64_t guest_phys_addr; 141 uint64_t memory_size; 142 uint64_t userspace_addr; 143 uint64_t mmap_offset; 144 } VhostUserMemoryRegion; 145 146 typedef struct VhostUserMemory { 147 uint32_t nregions; 148 uint32_t padding; 149 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 150 } VhostUserMemory; 151 152 typedef struct VhostUserMemRegMsg { 153 uint64_t padding; 154 VhostUserMemoryRegion region; 155 } VhostUserMemRegMsg; 156 157 typedef struct VhostUserLog { 158 uint64_t mmap_size; 159 uint64_t mmap_offset; 160 } VhostUserLog; 161 162 typedef struct VhostUserConfig { 163 uint32_t offset; 164 uint32_t size; 165 uint32_t flags; 166 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 167 } VhostUserConfig; 168 169 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 170 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 171 172 typedef struct VhostUserCryptoSession { 173 /* session id for success, -1 on errors */ 174 int64_t session_id; 175 CryptoDevBackendSymSessionInfo session_setup_data; 176 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 177 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 178 } VhostUserCryptoSession; 179 180 static VhostUserConfig c __attribute__ ((unused)); 181 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 182 + sizeof(c.size) \ 183 + sizeof(c.flags)) 184 185 typedef struct VhostUserVringArea { 186 uint64_t u64; 187 uint64_t size; 188 uint64_t offset; 189 } VhostUserVringArea; 190 191 typedef struct VhostUserInflight { 192 uint64_t mmap_size; 193 uint64_t mmap_offset; 194 uint16_t num_queues; 195 uint16_t queue_size; 196 } VhostUserInflight; 197 198 typedef struct { 199 VhostUserRequest request; 200 201 #define VHOST_USER_VERSION_MASK (0x3) 202 #define VHOST_USER_REPLY_MASK (0x1<<2) 203 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 204 uint32_t flags; 205 uint32_t size; /* the following payload size */ 206 } QEMU_PACKED VhostUserHeader; 207 208 typedef union { 209 #define VHOST_USER_VRING_IDX_MASK (0xff) 210 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 211 uint64_t u64; 212 struct vhost_vring_state state; 213 struct vhost_vring_addr addr; 214 VhostUserMemory memory; 215 VhostUserMemRegMsg mem_reg; 216 VhostUserLog log; 217 struct vhost_iotlb_msg iotlb; 218 VhostUserConfig config; 219 VhostUserCryptoSession session; 220 VhostUserVringArea area; 221 VhostUserInflight inflight; 222 } VhostUserPayload; 223 224 typedef struct VhostUserMsg { 225 VhostUserHeader hdr; 226 VhostUserPayload payload; 227 } QEMU_PACKED VhostUserMsg; 228 229 static VhostUserMsg m __attribute__ ((unused)); 230 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 231 232 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 233 234 /* The version of the protocol we support */ 235 #define VHOST_USER_VERSION (0x1) 236 237 struct vhost_user { 238 struct vhost_dev *dev; 239 /* Shared between vhost devs of the same virtio device */ 240 VhostUserState *user; 241 QIOChannel *slave_ioc; 242 GSource *slave_src; 243 NotifierWithReturn postcopy_notifier; 244 struct PostCopyFD postcopy_fd; 245 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 246 /* Length of the region_rb and region_rb_offset arrays */ 247 size_t region_rb_len; 248 /* RAMBlock associated with a given region */ 249 RAMBlock **region_rb; 250 /* The offset from the start of the RAMBlock to the start of the 251 * vhost region. 252 */ 253 ram_addr_t *region_rb_offset; 254 255 /* True once we've entered postcopy_listen */ 256 bool postcopy_listen; 257 258 /* Our current regions */ 259 int num_shadow_regions; 260 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 261 }; 262 263 struct scrub_regions { 264 struct vhost_memory_region *region; 265 int reg_idx; 266 int fd_idx; 267 }; 268 269 static bool ioeventfd_enabled(void) 270 { 271 return !kvm_enabled() || kvm_eventfds_enabled(); 272 } 273 274 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 275 { 276 struct vhost_user *u = dev->opaque; 277 CharBackend *chr = u->user->chr; 278 uint8_t *p = (uint8_t *) msg; 279 int r, size = VHOST_USER_HDR_SIZE; 280 281 r = qemu_chr_fe_read_all(chr, p, size); 282 if (r != size) { 283 error_report("Failed to read msg header. Read %d instead of %d." 284 " Original request %d.", r, size, msg->hdr.request); 285 return -1; 286 } 287 288 /* validate received flags */ 289 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 290 error_report("Failed to read msg header." 291 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 292 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 293 return -1; 294 } 295 296 return 0; 297 } 298 299 struct vhost_user_read_cb_data { 300 struct vhost_dev *dev; 301 VhostUserMsg *msg; 302 GMainLoop *loop; 303 int ret; 304 }; 305 306 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 307 gpointer opaque) 308 { 309 struct vhost_user_read_cb_data *data = opaque; 310 struct vhost_dev *dev = data->dev; 311 VhostUserMsg *msg = data->msg; 312 struct vhost_user *u = dev->opaque; 313 CharBackend *chr = u->user->chr; 314 uint8_t *p = (uint8_t *) msg; 315 int r, size; 316 317 if (vhost_user_read_header(dev, msg) < 0) { 318 data->ret = -1; 319 goto end; 320 } 321 322 /* validate message size is sane */ 323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 324 error_report("Failed to read msg header." 325 " Size %d exceeds the maximum %zu.", msg->hdr.size, 326 VHOST_USER_PAYLOAD_SIZE); 327 data->ret = -1; 328 goto end; 329 } 330 331 if (msg->hdr.size) { 332 p += VHOST_USER_HDR_SIZE; 333 size = msg->hdr.size; 334 r = qemu_chr_fe_read_all(chr, p, size); 335 if (r != size) { 336 error_report("Failed to read msg payload." 337 " Read %d instead of %d.", r, msg->hdr.size); 338 data->ret = -1; 339 goto end; 340 } 341 } 342 343 end: 344 g_main_loop_quit(data->loop); 345 return G_SOURCE_REMOVE; 346 } 347 348 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 349 gpointer opaque); 350 351 /* 352 * This updates the read handler to use a new event loop context. 353 * Event sources are removed from the previous context : this ensures 354 * that events detected in the previous context are purged. They will 355 * be re-detected and processed in the new context. 356 */ 357 static void slave_update_read_handler(struct vhost_dev *dev, 358 GMainContext *ctxt) 359 { 360 struct vhost_user *u = dev->opaque; 361 362 if (!u->slave_ioc) { 363 return; 364 } 365 366 if (u->slave_src) { 367 g_source_destroy(u->slave_src); 368 g_source_unref(u->slave_src); 369 } 370 371 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 372 G_IO_IN | G_IO_HUP, 373 slave_read, dev, NULL, 374 ctxt); 375 } 376 377 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 378 { 379 struct vhost_user *u = dev->opaque; 380 CharBackend *chr = u->user->chr; 381 GMainContext *prev_ctxt = chr->chr->gcontext; 382 GMainContext *ctxt = g_main_context_new(); 383 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 384 struct vhost_user_read_cb_data data = { 385 .dev = dev, 386 .loop = loop, 387 .msg = msg, 388 .ret = 0 389 }; 390 391 /* 392 * We want to be able to monitor the slave channel fd while waiting 393 * for chr I/O. This requires an event loop, but we can't nest the 394 * one to which chr is currently attached : its fd handlers might not 395 * be prepared for re-entrancy. So we create a new one and switch chr 396 * to use it. 397 */ 398 slave_update_read_handler(dev, ctxt); 399 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 400 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 401 402 g_main_loop_run(loop); 403 404 /* 405 * Restore the previous event loop context. This also destroys/recreates 406 * event sources : this guarantees that all pending events in the original 407 * context that have been processed by the nested loop are purged. 408 */ 409 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 410 slave_update_read_handler(dev, NULL); 411 412 g_main_loop_unref(loop); 413 g_main_context_unref(ctxt); 414 415 return data.ret; 416 } 417 418 static int process_message_reply(struct vhost_dev *dev, 419 const VhostUserMsg *msg) 420 { 421 VhostUserMsg msg_reply; 422 423 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 424 return 0; 425 } 426 427 if (vhost_user_read(dev, &msg_reply) < 0) { 428 return -1; 429 } 430 431 if (msg_reply.hdr.request != msg->hdr.request) { 432 error_report("Received unexpected msg type. " 433 "Expected %d received %d", 434 msg->hdr.request, msg_reply.hdr.request); 435 return -1; 436 } 437 438 return msg_reply.payload.u64 ? -1 : 0; 439 } 440 441 static bool vhost_user_one_time_request(VhostUserRequest request) 442 { 443 switch (request) { 444 case VHOST_USER_SET_OWNER: 445 case VHOST_USER_RESET_OWNER: 446 case VHOST_USER_SET_MEM_TABLE: 447 case VHOST_USER_GET_QUEUE_NUM: 448 case VHOST_USER_NET_SET_MTU: 449 return true; 450 default: 451 return false; 452 } 453 } 454 455 /* most non-init callers ignore the error */ 456 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 457 int *fds, int fd_num) 458 { 459 struct vhost_user *u = dev->opaque; 460 CharBackend *chr = u->user->chr; 461 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 462 463 /* 464 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 465 * we just need send it once in the first time. For later such 466 * request, we just ignore it. 467 */ 468 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 469 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 470 return 0; 471 } 472 473 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 474 error_report("Failed to set msg fds."); 475 return -1; 476 } 477 478 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 479 if (ret != size) { 480 error_report("Failed to write msg." 481 " Wrote %d instead of %d.", ret, size); 482 return -1; 483 } 484 485 return 0; 486 } 487 488 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 489 { 490 VhostUserMsg msg = { 491 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 492 .hdr.flags = VHOST_USER_VERSION, 493 }; 494 495 return vhost_user_write(dev, &msg, &fd, 1); 496 } 497 498 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 499 struct vhost_log *log) 500 { 501 int fds[VHOST_USER_MAX_RAM_SLOTS]; 502 size_t fd_num = 0; 503 bool shmfd = virtio_has_feature(dev->protocol_features, 504 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 505 VhostUserMsg msg = { 506 .hdr.request = VHOST_USER_SET_LOG_BASE, 507 .hdr.flags = VHOST_USER_VERSION, 508 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 509 .payload.log.mmap_offset = 0, 510 .hdr.size = sizeof(msg.payload.log), 511 }; 512 513 if (shmfd && log->fd != -1) { 514 fds[fd_num++] = log->fd; 515 } 516 517 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 518 return -1; 519 } 520 521 if (shmfd) { 522 msg.hdr.size = 0; 523 if (vhost_user_read(dev, &msg) < 0) { 524 return -1; 525 } 526 527 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 528 error_report("Received unexpected msg type. " 529 "Expected %d received %d", 530 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 531 return -1; 532 } 533 } 534 535 return 0; 536 } 537 538 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 539 int *fd) 540 { 541 MemoryRegion *mr; 542 543 assert((uintptr_t)addr == addr); 544 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 545 *fd = memory_region_get_fd(mr); 546 547 return mr; 548 } 549 550 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 551 struct vhost_memory_region *src, 552 uint64_t mmap_offset) 553 { 554 assert(src != NULL && dst != NULL); 555 dst->userspace_addr = src->userspace_addr; 556 dst->memory_size = src->memory_size; 557 dst->guest_phys_addr = src->guest_phys_addr; 558 dst->mmap_offset = mmap_offset; 559 } 560 561 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 562 struct vhost_dev *dev, 563 VhostUserMsg *msg, 564 int *fds, size_t *fd_num, 565 bool track_ramblocks) 566 { 567 int i, fd; 568 ram_addr_t offset; 569 MemoryRegion *mr; 570 struct vhost_memory_region *reg; 571 VhostUserMemoryRegion region_buffer; 572 573 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 574 575 for (i = 0; i < dev->mem->nregions; ++i) { 576 reg = dev->mem->regions + i; 577 578 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 579 if (fd > 0) { 580 if (track_ramblocks) { 581 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 582 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 583 reg->memory_size, 584 reg->guest_phys_addr, 585 reg->userspace_addr, 586 offset); 587 u->region_rb_offset[i] = offset; 588 u->region_rb[i] = mr->ram_block; 589 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 590 error_report("Failed preparing vhost-user memory table msg"); 591 return -1; 592 } 593 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 594 msg->payload.memory.regions[*fd_num] = region_buffer; 595 fds[(*fd_num)++] = fd; 596 } else if (track_ramblocks) { 597 u->region_rb_offset[i] = 0; 598 u->region_rb[i] = NULL; 599 } 600 } 601 602 msg->payload.memory.nregions = *fd_num; 603 604 if (!*fd_num) { 605 error_report("Failed initializing vhost-user memory map, " 606 "consider using -object memory-backend-file share=on"); 607 return -1; 608 } 609 610 msg->hdr.size = sizeof(msg->payload.memory.nregions); 611 msg->hdr.size += sizeof(msg->payload.memory.padding); 612 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 613 614 return 1; 615 } 616 617 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 618 struct vhost_memory_region *vdev_reg) 619 { 620 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 621 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 622 shadow_reg->memory_size == vdev_reg->memory_size; 623 } 624 625 static void scrub_shadow_regions(struct vhost_dev *dev, 626 struct scrub_regions *add_reg, 627 int *nr_add_reg, 628 struct scrub_regions *rem_reg, 629 int *nr_rem_reg, uint64_t *shadow_pcb, 630 bool track_ramblocks) 631 { 632 struct vhost_user *u = dev->opaque; 633 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 634 struct vhost_memory_region *reg, *shadow_reg; 635 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 636 ram_addr_t offset; 637 MemoryRegion *mr; 638 bool matching; 639 640 /* 641 * Find memory regions present in our shadow state which are not in 642 * the device's current memory state. 643 * 644 * Mark regions in both the shadow and device state as "found". 645 */ 646 for (i = 0; i < u->num_shadow_regions; i++) { 647 shadow_reg = &u->shadow_regions[i]; 648 matching = false; 649 650 for (j = 0; j < dev->mem->nregions; j++) { 651 reg = &dev->mem->regions[j]; 652 653 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 654 655 if (reg_equal(shadow_reg, reg)) { 656 matching = true; 657 found[j] = true; 658 if (track_ramblocks) { 659 /* 660 * Reset postcopy client bases, region_rb, and 661 * region_rb_offset in case regions are removed. 662 */ 663 if (fd > 0) { 664 u->region_rb_offset[j] = offset; 665 u->region_rb[j] = mr->ram_block; 666 shadow_pcb[j] = u->postcopy_client_bases[i]; 667 } else { 668 u->region_rb_offset[j] = 0; 669 u->region_rb[j] = NULL; 670 } 671 } 672 break; 673 } 674 } 675 676 /* 677 * If the region was not found in the current device memory state 678 * create an entry for it in the removed list. 679 */ 680 if (!matching) { 681 rem_reg[rm_idx].region = shadow_reg; 682 rem_reg[rm_idx++].reg_idx = i; 683 } 684 } 685 686 /* 687 * For regions not marked "found", create entries in the added list. 688 * 689 * Note their indexes in the device memory state and the indexes of their 690 * file descriptors. 691 */ 692 for (i = 0; i < dev->mem->nregions; i++) { 693 reg = &dev->mem->regions[i]; 694 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 695 if (fd > 0) { 696 ++fd_num; 697 } 698 699 /* 700 * If the region was in both the shadow and device state we don't 701 * need to send a VHOST_USER_ADD_MEM_REG message for it. 702 */ 703 if (found[i]) { 704 continue; 705 } 706 707 add_reg[add_idx].region = reg; 708 add_reg[add_idx].reg_idx = i; 709 add_reg[add_idx++].fd_idx = fd_num; 710 } 711 *nr_rem_reg = rm_idx; 712 *nr_add_reg = add_idx; 713 714 return; 715 } 716 717 static int send_remove_regions(struct vhost_dev *dev, 718 struct scrub_regions *remove_reg, 719 int nr_rem_reg, VhostUserMsg *msg, 720 bool reply_supported) 721 { 722 struct vhost_user *u = dev->opaque; 723 struct vhost_memory_region *shadow_reg; 724 int i, fd, shadow_reg_idx, ret; 725 ram_addr_t offset; 726 VhostUserMemoryRegion region_buffer; 727 728 /* 729 * The regions in remove_reg appear in the same order they do in the 730 * shadow table. Therefore we can minimize memory copies by iterating 731 * through remove_reg backwards. 732 */ 733 for (i = nr_rem_reg - 1; i >= 0; i--) { 734 shadow_reg = remove_reg[i].region; 735 shadow_reg_idx = remove_reg[i].reg_idx; 736 737 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 738 739 if (fd > 0) { 740 msg->hdr.request = VHOST_USER_REM_MEM_REG; 741 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 742 msg->payload.mem_reg.region = region_buffer; 743 744 if (vhost_user_write(dev, msg, &fd, 1) < 0) { 745 return -1; 746 } 747 748 if (reply_supported) { 749 ret = process_message_reply(dev, msg); 750 if (ret) { 751 return ret; 752 } 753 } 754 } 755 756 /* 757 * At this point we know the backend has unmapped the region. It is now 758 * safe to remove it from the shadow table. 759 */ 760 memmove(&u->shadow_regions[shadow_reg_idx], 761 &u->shadow_regions[shadow_reg_idx + 1], 762 sizeof(struct vhost_memory_region) * 763 (u->num_shadow_regions - shadow_reg_idx - 1)); 764 u->num_shadow_regions--; 765 } 766 767 return 0; 768 } 769 770 static int send_add_regions(struct vhost_dev *dev, 771 struct scrub_regions *add_reg, int nr_add_reg, 772 VhostUserMsg *msg, uint64_t *shadow_pcb, 773 bool reply_supported, bool track_ramblocks) 774 { 775 struct vhost_user *u = dev->opaque; 776 int i, fd, ret, reg_idx, reg_fd_idx; 777 struct vhost_memory_region *reg; 778 MemoryRegion *mr; 779 ram_addr_t offset; 780 VhostUserMsg msg_reply; 781 VhostUserMemoryRegion region_buffer; 782 783 for (i = 0; i < nr_add_reg; i++) { 784 reg = add_reg[i].region; 785 reg_idx = add_reg[i].reg_idx; 786 reg_fd_idx = add_reg[i].fd_idx; 787 788 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 789 790 if (fd > 0) { 791 if (track_ramblocks) { 792 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 793 reg->memory_size, 794 reg->guest_phys_addr, 795 reg->userspace_addr, 796 offset); 797 u->region_rb_offset[reg_idx] = offset; 798 u->region_rb[reg_idx] = mr->ram_block; 799 } 800 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 801 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 802 msg->payload.mem_reg.region = region_buffer; 803 804 if (vhost_user_write(dev, msg, &fd, 1) < 0) { 805 return -1; 806 } 807 808 if (track_ramblocks) { 809 uint64_t reply_gpa; 810 811 if (vhost_user_read(dev, &msg_reply) < 0) { 812 return -1; 813 } 814 815 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 816 817 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 818 error_report("%s: Received unexpected msg type." 819 "Expected %d received %d", __func__, 820 VHOST_USER_ADD_MEM_REG, 821 msg_reply.hdr.request); 822 return -1; 823 } 824 825 /* 826 * We're using the same structure, just reusing one of the 827 * fields, so it should be the same size. 828 */ 829 if (msg_reply.hdr.size != msg->hdr.size) { 830 error_report("%s: Unexpected size for postcopy reply " 831 "%d vs %d", __func__, msg_reply.hdr.size, 832 msg->hdr.size); 833 return -1; 834 } 835 836 /* Get the postcopy client base from the backend's reply. */ 837 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 838 shadow_pcb[reg_idx] = 839 msg_reply.payload.mem_reg.region.userspace_addr; 840 trace_vhost_user_set_mem_table_postcopy( 841 msg_reply.payload.mem_reg.region.userspace_addr, 842 msg->payload.mem_reg.region.userspace_addr, 843 reg_fd_idx, reg_idx); 844 } else { 845 error_report("%s: invalid postcopy reply for region. " 846 "Got guest physical address %" PRIX64 ", expected " 847 "%" PRIX64, __func__, reply_gpa, 848 dev->mem->regions[reg_idx].guest_phys_addr); 849 return -1; 850 } 851 } else if (reply_supported) { 852 ret = process_message_reply(dev, msg); 853 if (ret) { 854 return ret; 855 } 856 } 857 } else if (track_ramblocks) { 858 u->region_rb_offset[reg_idx] = 0; 859 u->region_rb[reg_idx] = NULL; 860 } 861 862 /* 863 * At this point, we know the backend has mapped in the new 864 * region, if the region has a valid file descriptor. 865 * 866 * The region should now be added to the shadow table. 867 */ 868 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 869 reg->guest_phys_addr; 870 u->shadow_regions[u->num_shadow_regions].userspace_addr = 871 reg->userspace_addr; 872 u->shadow_regions[u->num_shadow_regions].memory_size = 873 reg->memory_size; 874 u->num_shadow_regions++; 875 } 876 877 return 0; 878 } 879 880 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 881 VhostUserMsg *msg, 882 bool reply_supported, 883 bool track_ramblocks) 884 { 885 struct vhost_user *u = dev->opaque; 886 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 887 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 888 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 889 int nr_add_reg, nr_rem_reg; 890 891 msg->hdr.size = sizeof(msg->payload.mem_reg); 892 893 /* Find the regions which need to be removed or added. */ 894 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 895 shadow_pcb, track_ramblocks); 896 897 if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 898 reply_supported) < 0) 899 { 900 goto err; 901 } 902 903 if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, 904 shadow_pcb, reply_supported, track_ramblocks) < 0) 905 { 906 goto err; 907 } 908 909 if (track_ramblocks) { 910 memcpy(u->postcopy_client_bases, shadow_pcb, 911 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 912 /* 913 * Now we've registered this with the postcopy code, we ack to the 914 * client, because now we're in the position to be able to deal with 915 * any faults it generates. 916 */ 917 /* TODO: Use this for failure cases as well with a bad value. */ 918 msg->hdr.size = sizeof(msg->payload.u64); 919 msg->payload.u64 = 0; /* OK */ 920 921 if (vhost_user_write(dev, msg, NULL, 0) < 0) { 922 return -1; 923 } 924 } 925 926 return 0; 927 928 err: 929 if (track_ramblocks) { 930 memcpy(u->postcopy_client_bases, shadow_pcb, 931 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 932 } 933 934 return -1; 935 } 936 937 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 938 struct vhost_memory *mem, 939 bool reply_supported, 940 bool config_mem_slots) 941 { 942 struct vhost_user *u = dev->opaque; 943 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 944 size_t fd_num = 0; 945 VhostUserMsg msg_reply; 946 int region_i, msg_i; 947 948 VhostUserMsg msg = { 949 .hdr.flags = VHOST_USER_VERSION, 950 }; 951 952 if (u->region_rb_len < dev->mem->nregions) { 953 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 954 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 955 dev->mem->nregions); 956 memset(&(u->region_rb[u->region_rb_len]), '\0', 957 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 958 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 959 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 960 u->region_rb_len = dev->mem->nregions; 961 } 962 963 if (config_mem_slots) { 964 if (vhost_user_add_remove_regions(dev, &msg, reply_supported, 965 true) < 0) { 966 return -1; 967 } 968 } else { 969 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 970 true) < 0) { 971 return -1; 972 } 973 974 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 975 return -1; 976 } 977 978 if (vhost_user_read(dev, &msg_reply) < 0) { 979 return -1; 980 } 981 982 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 983 error_report("%s: Received unexpected msg type." 984 "Expected %d received %d", __func__, 985 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 986 return -1; 987 } 988 989 /* 990 * We're using the same structure, just reusing one of the 991 * fields, so it should be the same size. 992 */ 993 if (msg_reply.hdr.size != msg.hdr.size) { 994 error_report("%s: Unexpected size for postcopy reply " 995 "%d vs %d", __func__, msg_reply.hdr.size, 996 msg.hdr.size); 997 return -1; 998 } 999 1000 memset(u->postcopy_client_bases, 0, 1001 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1002 1003 /* 1004 * They're in the same order as the regions that were sent 1005 * but some of the regions were skipped (above) if they 1006 * didn't have fd's 1007 */ 1008 for (msg_i = 0, region_i = 0; 1009 region_i < dev->mem->nregions; 1010 region_i++) { 1011 if (msg_i < fd_num && 1012 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1013 dev->mem->regions[region_i].guest_phys_addr) { 1014 u->postcopy_client_bases[region_i] = 1015 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1016 trace_vhost_user_set_mem_table_postcopy( 1017 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1018 msg.payload.memory.regions[msg_i].userspace_addr, 1019 msg_i, region_i); 1020 msg_i++; 1021 } 1022 } 1023 if (msg_i != fd_num) { 1024 error_report("%s: postcopy reply not fully consumed " 1025 "%d vs %zd", 1026 __func__, msg_i, fd_num); 1027 return -1; 1028 } 1029 1030 /* 1031 * Now we've registered this with the postcopy code, we ack to the 1032 * client, because now we're in the position to be able to deal 1033 * with any faults it generates. 1034 */ 1035 /* TODO: Use this for failure cases as well with a bad value. */ 1036 msg.hdr.size = sizeof(msg.payload.u64); 1037 msg.payload.u64 = 0; /* OK */ 1038 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1039 return -1; 1040 } 1041 } 1042 1043 return 0; 1044 } 1045 1046 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1047 struct vhost_memory *mem) 1048 { 1049 struct vhost_user *u = dev->opaque; 1050 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1051 size_t fd_num = 0; 1052 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1053 bool reply_supported = virtio_has_feature(dev->protocol_features, 1054 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1055 bool config_mem_slots = 1056 virtio_has_feature(dev->protocol_features, 1057 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1058 1059 if (do_postcopy) { 1060 /* 1061 * Postcopy has enough differences that it's best done in it's own 1062 * version 1063 */ 1064 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1065 config_mem_slots); 1066 } 1067 1068 VhostUserMsg msg = { 1069 .hdr.flags = VHOST_USER_VERSION, 1070 }; 1071 1072 if (reply_supported) { 1073 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1074 } 1075 1076 if (config_mem_slots) { 1077 if (vhost_user_add_remove_regions(dev, &msg, reply_supported, 1078 false) < 0) { 1079 return -1; 1080 } 1081 } else { 1082 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1083 false) < 0) { 1084 return -1; 1085 } 1086 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 1087 return -1; 1088 } 1089 1090 if (reply_supported) { 1091 return process_message_reply(dev, &msg); 1092 } 1093 } 1094 1095 return 0; 1096 } 1097 1098 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1099 struct vhost_vring_state *ring) 1100 { 1101 bool cross_endian = virtio_has_feature(dev->protocol_features, 1102 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1103 VhostUserMsg msg = { 1104 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1105 .hdr.flags = VHOST_USER_VERSION, 1106 .payload.state = *ring, 1107 .hdr.size = sizeof(msg.payload.state), 1108 }; 1109 1110 if (!cross_endian) { 1111 error_report("vhost-user trying to send unhandled ioctl"); 1112 return -1; 1113 } 1114 1115 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1116 return -1; 1117 } 1118 1119 return 0; 1120 } 1121 1122 static int vhost_set_vring(struct vhost_dev *dev, 1123 unsigned long int request, 1124 struct vhost_vring_state *ring) 1125 { 1126 VhostUserMsg msg = { 1127 .hdr.request = request, 1128 .hdr.flags = VHOST_USER_VERSION, 1129 .payload.state = *ring, 1130 .hdr.size = sizeof(msg.payload.state), 1131 }; 1132 1133 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1134 return -1; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1141 struct vhost_vring_state *ring) 1142 { 1143 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1144 } 1145 1146 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 1147 int queue_idx) 1148 { 1149 struct vhost_user *u = dev->opaque; 1150 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1151 VirtIODevice *vdev = dev->vdev; 1152 1153 if (n->addr && !n->set) { 1154 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 1155 n->set = true; 1156 } 1157 } 1158 1159 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 1160 int queue_idx) 1161 { 1162 struct vhost_user *u = dev->opaque; 1163 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1164 VirtIODevice *vdev = dev->vdev; 1165 1166 if (n->addr && n->set) { 1167 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1168 n->set = false; 1169 } 1170 } 1171 1172 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1173 struct vhost_vring_state *ring) 1174 { 1175 vhost_user_host_notifier_restore(dev, ring->index); 1176 1177 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1178 } 1179 1180 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1181 { 1182 int i; 1183 1184 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1185 return -1; 1186 } 1187 1188 for (i = 0; i < dev->nvqs; ++i) { 1189 struct vhost_vring_state state = { 1190 .index = dev->vq_index + i, 1191 .num = enable, 1192 }; 1193 1194 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1195 } 1196 1197 return 0; 1198 } 1199 1200 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1201 struct vhost_vring_state *ring) 1202 { 1203 VhostUserMsg msg = { 1204 .hdr.request = VHOST_USER_GET_VRING_BASE, 1205 .hdr.flags = VHOST_USER_VERSION, 1206 .payload.state = *ring, 1207 .hdr.size = sizeof(msg.payload.state), 1208 }; 1209 1210 vhost_user_host_notifier_remove(dev, ring->index); 1211 1212 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1213 return -1; 1214 } 1215 1216 if (vhost_user_read(dev, &msg) < 0) { 1217 return -1; 1218 } 1219 1220 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1221 error_report("Received unexpected msg type. Expected %d received %d", 1222 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1223 return -1; 1224 } 1225 1226 if (msg.hdr.size != sizeof(msg.payload.state)) { 1227 error_report("Received bad msg size."); 1228 return -1; 1229 } 1230 1231 *ring = msg.payload.state; 1232 1233 return 0; 1234 } 1235 1236 static int vhost_set_vring_file(struct vhost_dev *dev, 1237 VhostUserRequest request, 1238 struct vhost_vring_file *file) 1239 { 1240 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1241 size_t fd_num = 0; 1242 VhostUserMsg msg = { 1243 .hdr.request = request, 1244 .hdr.flags = VHOST_USER_VERSION, 1245 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1246 .hdr.size = sizeof(msg.payload.u64), 1247 }; 1248 1249 if (ioeventfd_enabled() && file->fd > 0) { 1250 fds[fd_num++] = file->fd; 1251 } else { 1252 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1253 } 1254 1255 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 1256 return -1; 1257 } 1258 1259 return 0; 1260 } 1261 1262 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1263 struct vhost_vring_file *file) 1264 { 1265 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1266 } 1267 1268 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1269 struct vhost_vring_file *file) 1270 { 1271 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1272 } 1273 1274 1275 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1276 { 1277 VhostUserMsg msg = { 1278 .hdr.request = request, 1279 .hdr.flags = VHOST_USER_VERSION, 1280 }; 1281 1282 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1283 return 0; 1284 } 1285 1286 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1287 return -1; 1288 } 1289 1290 if (vhost_user_read(dev, &msg) < 0) { 1291 return -1; 1292 } 1293 1294 if (msg.hdr.request != request) { 1295 error_report("Received unexpected msg type. Expected %d received %d", 1296 request, msg.hdr.request); 1297 return -1; 1298 } 1299 1300 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1301 error_report("Received bad msg size."); 1302 return -1; 1303 } 1304 1305 *u64 = msg.payload.u64; 1306 1307 return 0; 1308 } 1309 1310 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1311 { 1312 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1313 return -EPROTO; 1314 } 1315 1316 return 0; 1317 } 1318 1319 static int enforce_reply(struct vhost_dev *dev, 1320 const VhostUserMsg *msg) 1321 { 1322 uint64_t dummy; 1323 1324 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1325 return process_message_reply(dev, msg); 1326 } 1327 1328 /* 1329 * We need to wait for a reply but the backend does not 1330 * support replies for the command we just sent. 1331 * Send VHOST_USER_GET_FEATURES which makes all backends 1332 * send a reply. 1333 */ 1334 return vhost_user_get_features(dev, &dummy); 1335 } 1336 1337 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1338 struct vhost_vring_addr *addr) 1339 { 1340 VhostUserMsg msg = { 1341 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1342 .hdr.flags = VHOST_USER_VERSION, 1343 .payload.addr = *addr, 1344 .hdr.size = sizeof(msg.payload.addr), 1345 }; 1346 1347 bool reply_supported = virtio_has_feature(dev->protocol_features, 1348 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1349 1350 /* 1351 * wait for a reply if logging is enabled to make sure 1352 * backend is actually logging changes 1353 */ 1354 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1355 1356 if (reply_supported && wait_for_reply) { 1357 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1358 } 1359 1360 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1361 return -1; 1362 } 1363 1364 if (wait_for_reply) { 1365 return enforce_reply(dev, &msg); 1366 } 1367 1368 return 0; 1369 } 1370 1371 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1372 bool wait_for_reply) 1373 { 1374 VhostUserMsg msg = { 1375 .hdr.request = request, 1376 .hdr.flags = VHOST_USER_VERSION, 1377 .payload.u64 = u64, 1378 .hdr.size = sizeof(msg.payload.u64), 1379 }; 1380 1381 if (wait_for_reply) { 1382 bool reply_supported = virtio_has_feature(dev->protocol_features, 1383 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1384 if (reply_supported) { 1385 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1386 } 1387 } 1388 1389 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1390 return -1; 1391 } 1392 1393 if (wait_for_reply) { 1394 return enforce_reply(dev, &msg); 1395 } 1396 1397 return 0; 1398 } 1399 1400 static int vhost_user_set_features(struct vhost_dev *dev, 1401 uint64_t features) 1402 { 1403 /* 1404 * wait for a reply if logging is enabled to make sure 1405 * backend is actually logging changes 1406 */ 1407 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1408 1409 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, 1410 log_enabled); 1411 } 1412 1413 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1414 uint64_t features) 1415 { 1416 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1417 false); 1418 } 1419 1420 static int vhost_user_set_owner(struct vhost_dev *dev) 1421 { 1422 VhostUserMsg msg = { 1423 .hdr.request = VHOST_USER_SET_OWNER, 1424 .hdr.flags = VHOST_USER_VERSION, 1425 }; 1426 1427 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1428 return -EPROTO; 1429 } 1430 1431 return 0; 1432 } 1433 1434 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1435 uint64_t *max_memslots) 1436 { 1437 uint64_t backend_max_memslots; 1438 int err; 1439 1440 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1441 &backend_max_memslots); 1442 if (err < 0) { 1443 return err; 1444 } 1445 1446 *max_memslots = backend_max_memslots; 1447 1448 return 0; 1449 } 1450 1451 static int vhost_user_reset_device(struct vhost_dev *dev) 1452 { 1453 VhostUserMsg msg = { 1454 .hdr.flags = VHOST_USER_VERSION, 1455 }; 1456 1457 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1458 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1459 ? VHOST_USER_RESET_DEVICE 1460 : VHOST_USER_RESET_OWNER; 1461 1462 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1463 return -1; 1464 } 1465 1466 return 0; 1467 } 1468 1469 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1470 { 1471 int ret = -1; 1472 1473 if (!dev->config_ops) { 1474 return -1; 1475 } 1476 1477 if (dev->config_ops->vhost_dev_config_notifier) { 1478 ret = dev->config_ops->vhost_dev_config_notifier(dev); 1479 } 1480 1481 return ret; 1482 } 1483 1484 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1485 VhostUserVringArea *area, 1486 int fd) 1487 { 1488 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1489 size_t page_size = qemu_real_host_page_size; 1490 struct vhost_user *u = dev->opaque; 1491 VhostUserState *user = u->user; 1492 VirtIODevice *vdev = dev->vdev; 1493 VhostUserHostNotifier *n; 1494 void *addr; 1495 char *name; 1496 1497 if (!virtio_has_feature(dev->protocol_features, 1498 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1499 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1500 return -1; 1501 } 1502 1503 n = &user->notifier[queue_idx]; 1504 1505 if (n->addr) { 1506 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1507 object_unparent(OBJECT(&n->mr)); 1508 munmap(n->addr, page_size); 1509 n->addr = NULL; 1510 } 1511 1512 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1513 return 0; 1514 } 1515 1516 /* Sanity check. */ 1517 if (area->size != page_size) { 1518 return -1; 1519 } 1520 1521 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1522 fd, area->offset); 1523 if (addr == MAP_FAILED) { 1524 return -1; 1525 } 1526 1527 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1528 user, queue_idx); 1529 if (!n->mr.ram) /* Don't init again after suspend. */ 1530 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1531 page_size, addr); 1532 g_free(name); 1533 1534 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1535 object_unparent(OBJECT(&n->mr)); 1536 munmap(addr, page_size); 1537 return -1; 1538 } 1539 1540 n->addr = addr; 1541 n->set = true; 1542 1543 return 0; 1544 } 1545 1546 static void close_slave_channel(struct vhost_user *u) 1547 { 1548 g_source_destroy(u->slave_src); 1549 g_source_unref(u->slave_src); 1550 u->slave_src = NULL; 1551 object_unref(OBJECT(u->slave_ioc)); 1552 u->slave_ioc = NULL; 1553 } 1554 1555 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1556 gpointer opaque) 1557 { 1558 struct vhost_dev *dev = opaque; 1559 struct vhost_user *u = dev->opaque; 1560 VhostUserHeader hdr = { 0, }; 1561 VhostUserPayload payload = { 0, }; 1562 Error *local_err = NULL; 1563 gboolean rc = G_SOURCE_CONTINUE; 1564 int ret = 0; 1565 struct iovec iov; 1566 g_autofree int *fd = NULL; 1567 size_t fdsize = 0; 1568 int i; 1569 1570 /* Read header */ 1571 iov.iov_base = &hdr; 1572 iov.iov_len = VHOST_USER_HDR_SIZE; 1573 1574 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1575 error_report_err(local_err); 1576 goto err; 1577 } 1578 1579 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1580 error_report("Failed to read msg header." 1581 " Size %d exceeds the maximum %zu.", hdr.size, 1582 VHOST_USER_PAYLOAD_SIZE); 1583 goto err; 1584 } 1585 1586 /* Read payload */ 1587 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1588 error_report_err(local_err); 1589 goto err; 1590 } 1591 1592 switch (hdr.request) { 1593 case VHOST_USER_SLAVE_IOTLB_MSG: 1594 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1595 break; 1596 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1597 ret = vhost_user_slave_handle_config_change(dev); 1598 break; 1599 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1600 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1601 fd ? fd[0] : -1); 1602 break; 1603 default: 1604 error_report("Received unexpected msg type: %d.", hdr.request); 1605 ret = -EINVAL; 1606 } 1607 1608 /* 1609 * REPLY_ACK feature handling. Other reply types has to be managed 1610 * directly in their request handlers. 1611 */ 1612 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1613 struct iovec iovec[2]; 1614 1615 1616 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1617 hdr.flags |= VHOST_USER_REPLY_MASK; 1618 1619 payload.u64 = !!ret; 1620 hdr.size = sizeof(payload.u64); 1621 1622 iovec[0].iov_base = &hdr; 1623 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1624 iovec[1].iov_base = &payload; 1625 iovec[1].iov_len = hdr.size; 1626 1627 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1628 error_report_err(local_err); 1629 goto err; 1630 } 1631 } 1632 1633 goto fdcleanup; 1634 1635 err: 1636 close_slave_channel(u); 1637 rc = G_SOURCE_REMOVE; 1638 1639 fdcleanup: 1640 if (fd) { 1641 for (i = 0; i < fdsize; i++) { 1642 close(fd[i]); 1643 } 1644 } 1645 return rc; 1646 } 1647 1648 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1649 { 1650 VhostUserMsg msg = { 1651 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1652 .hdr.flags = VHOST_USER_VERSION, 1653 }; 1654 struct vhost_user *u = dev->opaque; 1655 int sv[2], ret = 0; 1656 bool reply_supported = virtio_has_feature(dev->protocol_features, 1657 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1658 Error *local_err = NULL; 1659 QIOChannel *ioc; 1660 1661 if (!virtio_has_feature(dev->protocol_features, 1662 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1663 return 0; 1664 } 1665 1666 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1667 error_report("socketpair() failed"); 1668 return -1; 1669 } 1670 1671 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1672 if (!ioc) { 1673 error_report_err(local_err); 1674 return -1; 1675 } 1676 u->slave_ioc = ioc; 1677 slave_update_read_handler(dev, NULL); 1678 1679 if (reply_supported) { 1680 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1681 } 1682 1683 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1684 if (ret) { 1685 goto out; 1686 } 1687 1688 if (reply_supported) { 1689 ret = process_message_reply(dev, &msg); 1690 } 1691 1692 out: 1693 close(sv[1]); 1694 if (ret) { 1695 close_slave_channel(u); 1696 } 1697 1698 return ret; 1699 } 1700 1701 #ifdef CONFIG_LINUX 1702 /* 1703 * Called back from the postcopy fault thread when a fault is received on our 1704 * ufd. 1705 * TODO: This is Linux specific 1706 */ 1707 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1708 void *ufd) 1709 { 1710 struct vhost_dev *dev = pcfd->data; 1711 struct vhost_user *u = dev->opaque; 1712 struct uffd_msg *msg = ufd; 1713 uint64_t faultaddr = msg->arg.pagefault.address; 1714 RAMBlock *rb = NULL; 1715 uint64_t rb_offset; 1716 int i; 1717 1718 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1719 dev->mem->nregions); 1720 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1721 trace_vhost_user_postcopy_fault_handler_loop(i, 1722 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1723 if (faultaddr >= u->postcopy_client_bases[i]) { 1724 /* Ofset of the fault address in the vhost region */ 1725 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1726 if (region_offset < dev->mem->regions[i].memory_size) { 1727 rb_offset = region_offset + u->region_rb_offset[i]; 1728 trace_vhost_user_postcopy_fault_handler_found(i, 1729 region_offset, rb_offset); 1730 rb = u->region_rb[i]; 1731 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1732 rb_offset); 1733 } 1734 } 1735 } 1736 error_report("%s: Failed to find region for fault %" PRIx64, 1737 __func__, faultaddr); 1738 return -1; 1739 } 1740 1741 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1742 uint64_t offset) 1743 { 1744 struct vhost_dev *dev = pcfd->data; 1745 struct vhost_user *u = dev->opaque; 1746 int i; 1747 1748 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1749 1750 if (!u) { 1751 return 0; 1752 } 1753 /* Translate the offset into an address in the clients address space */ 1754 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1755 if (u->region_rb[i] == rb && 1756 offset >= u->region_rb_offset[i] && 1757 offset < (u->region_rb_offset[i] + 1758 dev->mem->regions[i].memory_size)) { 1759 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1760 u->postcopy_client_bases[i]; 1761 trace_vhost_user_postcopy_waker_found(client_addr); 1762 return postcopy_wake_shared(pcfd, client_addr, rb); 1763 } 1764 } 1765 1766 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1767 return 0; 1768 } 1769 #endif 1770 1771 /* 1772 * Called at the start of an inbound postcopy on reception of the 1773 * 'advise' command. 1774 */ 1775 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1776 { 1777 #ifdef CONFIG_LINUX 1778 struct vhost_user *u = dev->opaque; 1779 CharBackend *chr = u->user->chr; 1780 int ufd; 1781 VhostUserMsg msg = { 1782 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1783 .hdr.flags = VHOST_USER_VERSION, 1784 }; 1785 1786 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1787 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1788 return -1; 1789 } 1790 1791 if (vhost_user_read(dev, &msg) < 0) { 1792 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1793 return -1; 1794 } 1795 1796 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1797 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1798 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1799 return -1; 1800 } 1801 1802 if (msg.hdr.size) { 1803 error_setg(errp, "Received bad msg size."); 1804 return -1; 1805 } 1806 ufd = qemu_chr_fe_get_msgfd(chr); 1807 if (ufd < 0) { 1808 error_setg(errp, "%s: Failed to get ufd", __func__); 1809 return -1; 1810 } 1811 qemu_set_nonblock(ufd); 1812 1813 /* register ufd with userfault thread */ 1814 u->postcopy_fd.fd = ufd; 1815 u->postcopy_fd.data = dev; 1816 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1817 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1818 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1819 postcopy_register_shared_ufd(&u->postcopy_fd); 1820 return 0; 1821 #else 1822 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1823 return -1; 1824 #endif 1825 } 1826 1827 /* 1828 * Called at the switch to postcopy on reception of the 'listen' command. 1829 */ 1830 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1831 { 1832 struct vhost_user *u = dev->opaque; 1833 int ret; 1834 VhostUserMsg msg = { 1835 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1836 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1837 }; 1838 u->postcopy_listen = true; 1839 trace_vhost_user_postcopy_listen(); 1840 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1841 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1842 return -1; 1843 } 1844 1845 ret = process_message_reply(dev, &msg); 1846 if (ret) { 1847 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1848 return ret; 1849 } 1850 1851 return 0; 1852 } 1853 1854 /* 1855 * Called at the end of postcopy 1856 */ 1857 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1858 { 1859 VhostUserMsg msg = { 1860 .hdr.request = VHOST_USER_POSTCOPY_END, 1861 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1862 }; 1863 int ret; 1864 struct vhost_user *u = dev->opaque; 1865 1866 trace_vhost_user_postcopy_end_entry(); 1867 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1868 error_setg(errp, "Failed to send postcopy_end to vhost"); 1869 return -1; 1870 } 1871 1872 ret = process_message_reply(dev, &msg); 1873 if (ret) { 1874 error_setg(errp, "Failed to receive reply to postcopy_end"); 1875 return ret; 1876 } 1877 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1878 close(u->postcopy_fd.fd); 1879 u->postcopy_fd.handler = NULL; 1880 1881 trace_vhost_user_postcopy_end_exit(); 1882 1883 return 0; 1884 } 1885 1886 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1887 void *opaque) 1888 { 1889 struct PostcopyNotifyData *pnd = opaque; 1890 struct vhost_user *u = container_of(notifier, struct vhost_user, 1891 postcopy_notifier); 1892 struct vhost_dev *dev = u->dev; 1893 1894 switch (pnd->reason) { 1895 case POSTCOPY_NOTIFY_PROBE: 1896 if (!virtio_has_feature(dev->protocol_features, 1897 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1898 /* TODO: Get the device name into this error somehow */ 1899 error_setg(pnd->errp, 1900 "vhost-user backend not capable of postcopy"); 1901 return -ENOENT; 1902 } 1903 break; 1904 1905 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1906 return vhost_user_postcopy_advise(dev, pnd->errp); 1907 1908 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1909 return vhost_user_postcopy_listen(dev, pnd->errp); 1910 1911 case POSTCOPY_NOTIFY_INBOUND_END: 1912 return vhost_user_postcopy_end(dev, pnd->errp); 1913 1914 default: 1915 /* We ignore notifications we don't know */ 1916 break; 1917 } 1918 1919 return 0; 1920 } 1921 1922 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1923 Error **errp) 1924 { 1925 uint64_t features, protocol_features, ram_slots; 1926 struct vhost_user *u; 1927 int err; 1928 1929 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1930 1931 u = g_new0(struct vhost_user, 1); 1932 u->user = opaque; 1933 u->dev = dev; 1934 dev->opaque = u; 1935 1936 err = vhost_user_get_features(dev, &features); 1937 if (err < 0) { 1938 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1939 return err; 1940 } 1941 1942 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1943 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1944 1945 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1946 &protocol_features); 1947 if (err < 0) { 1948 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1949 return -EPROTO; 1950 } 1951 1952 dev->protocol_features = 1953 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1954 1955 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1956 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1957 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1958 } else if (!(protocol_features & 1959 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1960 error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1961 "but backend does not support it."); 1962 return -EINVAL; 1963 } 1964 1965 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1966 if (err < 0) { 1967 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1968 return -EPROTO; 1969 } 1970 1971 /* query the max queues we support if backend supports Multiple Queue */ 1972 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1973 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1974 &dev->max_queues); 1975 if (err < 0) { 1976 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1977 return -EPROTO; 1978 } 1979 } else { 1980 dev->max_queues = 1; 1981 } 1982 1983 if (dev->num_queues && dev->max_queues < dev->num_queues) { 1984 error_setg(errp, "The maximum number of queues supported by the " 1985 "backend is %" PRIu64, dev->max_queues); 1986 return -EINVAL; 1987 } 1988 1989 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1990 !(virtio_has_feature(dev->protocol_features, 1991 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1992 virtio_has_feature(dev->protocol_features, 1993 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1994 error_setg(errp, "IOMMU support requires reply-ack and " 1995 "slave-req protocol features."); 1996 return -EINVAL; 1997 } 1998 1999 /* get max memory regions if backend supports configurable RAM slots */ 2000 if (!virtio_has_feature(dev->protocol_features, 2001 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2002 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2003 } else { 2004 err = vhost_user_get_max_memslots(dev, &ram_slots); 2005 if (err < 0) { 2006 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2007 return -EPROTO; 2008 } 2009 2010 if (ram_slots < u->user->memory_slots) { 2011 error_setg(errp, "The backend specified a max ram slots limit " 2012 "of %" PRIu64", when the prior validated limit was " 2013 "%d. This limit should never decrease.", ram_slots, 2014 u->user->memory_slots); 2015 return -EINVAL; 2016 } 2017 2018 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2019 } 2020 } 2021 2022 if (dev->migration_blocker == NULL && 2023 !virtio_has_feature(dev->protocol_features, 2024 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2025 error_setg(&dev->migration_blocker, 2026 "Migration disabled: vhost-user backend lacks " 2027 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2028 } 2029 2030 if (dev->vq_index == 0) { 2031 err = vhost_setup_slave_channel(dev); 2032 if (err < 0) { 2033 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2034 return -EPROTO; 2035 } 2036 } 2037 2038 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2039 postcopy_add_notifier(&u->postcopy_notifier); 2040 2041 return 0; 2042 } 2043 2044 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2045 { 2046 struct vhost_user *u; 2047 2048 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2049 2050 u = dev->opaque; 2051 if (u->postcopy_notifier.notify) { 2052 postcopy_remove_notifier(&u->postcopy_notifier); 2053 u->postcopy_notifier.notify = NULL; 2054 } 2055 u->postcopy_listen = false; 2056 if (u->postcopy_fd.handler) { 2057 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2058 close(u->postcopy_fd.fd); 2059 u->postcopy_fd.handler = NULL; 2060 } 2061 if (u->slave_ioc) { 2062 close_slave_channel(u); 2063 } 2064 g_free(u->region_rb); 2065 u->region_rb = NULL; 2066 g_free(u->region_rb_offset); 2067 u->region_rb_offset = NULL; 2068 u->region_rb_len = 0; 2069 g_free(u); 2070 dev->opaque = 0; 2071 2072 return 0; 2073 } 2074 2075 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2076 { 2077 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2078 2079 return idx; 2080 } 2081 2082 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2083 { 2084 struct vhost_user *u = dev->opaque; 2085 2086 return u->user->memory_slots; 2087 } 2088 2089 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2090 { 2091 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2092 2093 return virtio_has_feature(dev->protocol_features, 2094 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2095 } 2096 2097 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2098 { 2099 VhostUserMsg msg = { }; 2100 2101 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2102 2103 /* If guest supports GUEST_ANNOUNCE do nothing */ 2104 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2105 return 0; 2106 } 2107 2108 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2109 if (virtio_has_feature(dev->protocol_features, 2110 VHOST_USER_PROTOCOL_F_RARP)) { 2111 msg.hdr.request = VHOST_USER_SEND_RARP; 2112 msg.hdr.flags = VHOST_USER_VERSION; 2113 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2114 msg.hdr.size = sizeof(msg.payload.u64); 2115 2116 return vhost_user_write(dev, &msg, NULL, 0); 2117 } 2118 return -1; 2119 } 2120 2121 static bool vhost_user_can_merge(struct vhost_dev *dev, 2122 uint64_t start1, uint64_t size1, 2123 uint64_t start2, uint64_t size2) 2124 { 2125 ram_addr_t offset; 2126 int mfd, rfd; 2127 2128 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2129 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2130 2131 return mfd == rfd; 2132 } 2133 2134 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2135 { 2136 VhostUserMsg msg; 2137 bool reply_supported = virtio_has_feature(dev->protocol_features, 2138 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2139 2140 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2141 return 0; 2142 } 2143 2144 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2145 msg.payload.u64 = mtu; 2146 msg.hdr.size = sizeof(msg.payload.u64); 2147 msg.hdr.flags = VHOST_USER_VERSION; 2148 if (reply_supported) { 2149 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2150 } 2151 2152 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2153 return -1; 2154 } 2155 2156 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2157 if (reply_supported) { 2158 return process_message_reply(dev, &msg); 2159 } 2160 2161 return 0; 2162 } 2163 2164 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2165 struct vhost_iotlb_msg *imsg) 2166 { 2167 VhostUserMsg msg = { 2168 .hdr.request = VHOST_USER_IOTLB_MSG, 2169 .hdr.size = sizeof(msg.payload.iotlb), 2170 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2171 .payload.iotlb = *imsg, 2172 }; 2173 2174 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2175 return -EFAULT; 2176 } 2177 2178 return process_message_reply(dev, &msg); 2179 } 2180 2181 2182 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2183 { 2184 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2185 } 2186 2187 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2188 uint32_t config_len, Error **errp) 2189 { 2190 VhostUserMsg msg = { 2191 .hdr.request = VHOST_USER_GET_CONFIG, 2192 .hdr.flags = VHOST_USER_VERSION, 2193 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2194 }; 2195 2196 if (!virtio_has_feature(dev->protocol_features, 2197 VHOST_USER_PROTOCOL_F_CONFIG)) { 2198 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2199 return -EINVAL; 2200 } 2201 2202 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2203 2204 msg.payload.config.offset = 0; 2205 msg.payload.config.size = config_len; 2206 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2207 error_setg_errno(errp, EPROTO, "vhost_get_config failed"); 2208 return -EPROTO; 2209 } 2210 2211 if (vhost_user_read(dev, &msg) < 0) { 2212 error_setg_errno(errp, EPROTO, "vhost_get_config failed"); 2213 return -EPROTO; 2214 } 2215 2216 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2217 error_setg(errp, 2218 "Received unexpected msg type. Expected %d received %d", 2219 VHOST_USER_GET_CONFIG, msg.hdr.request); 2220 return -EINVAL; 2221 } 2222 2223 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2224 error_setg(errp, "Received bad msg size."); 2225 return -EINVAL; 2226 } 2227 2228 memcpy(config, msg.payload.config.region, config_len); 2229 2230 return 0; 2231 } 2232 2233 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2234 uint32_t offset, uint32_t size, uint32_t flags) 2235 { 2236 uint8_t *p; 2237 bool reply_supported = virtio_has_feature(dev->protocol_features, 2238 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2239 2240 VhostUserMsg msg = { 2241 .hdr.request = VHOST_USER_SET_CONFIG, 2242 .hdr.flags = VHOST_USER_VERSION, 2243 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2244 }; 2245 2246 if (!virtio_has_feature(dev->protocol_features, 2247 VHOST_USER_PROTOCOL_F_CONFIG)) { 2248 return -1; 2249 } 2250 2251 if (reply_supported) { 2252 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2253 } 2254 2255 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2256 return -1; 2257 } 2258 2259 msg.payload.config.offset = offset, 2260 msg.payload.config.size = size, 2261 msg.payload.config.flags = flags, 2262 p = msg.payload.config.region; 2263 memcpy(p, data, size); 2264 2265 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2266 return -1; 2267 } 2268 2269 if (reply_supported) { 2270 return process_message_reply(dev, &msg); 2271 } 2272 2273 return 0; 2274 } 2275 2276 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2277 void *session_info, 2278 uint64_t *session_id) 2279 { 2280 bool crypto_session = virtio_has_feature(dev->protocol_features, 2281 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2282 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2283 VhostUserMsg msg = { 2284 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2285 .hdr.flags = VHOST_USER_VERSION, 2286 .hdr.size = sizeof(msg.payload.session), 2287 }; 2288 2289 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2290 2291 if (!crypto_session) { 2292 error_report("vhost-user trying to send unhandled ioctl"); 2293 return -1; 2294 } 2295 2296 memcpy(&msg.payload.session.session_setup_data, sess_info, 2297 sizeof(CryptoDevBackendSymSessionInfo)); 2298 if (sess_info->key_len) { 2299 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2300 sess_info->key_len); 2301 } 2302 if (sess_info->auth_key_len > 0) { 2303 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2304 sess_info->auth_key_len); 2305 } 2306 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2307 error_report("vhost_user_write() return -1, create session failed"); 2308 return -1; 2309 } 2310 2311 if (vhost_user_read(dev, &msg) < 0) { 2312 error_report("vhost_user_read() return -1, create session failed"); 2313 return -1; 2314 } 2315 2316 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2317 error_report("Received unexpected msg type. Expected %d received %d", 2318 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2319 return -1; 2320 } 2321 2322 if (msg.hdr.size != sizeof(msg.payload.session)) { 2323 error_report("Received bad msg size."); 2324 return -1; 2325 } 2326 2327 if (msg.payload.session.session_id < 0) { 2328 error_report("Bad session id: %" PRId64 "", 2329 msg.payload.session.session_id); 2330 return -1; 2331 } 2332 *session_id = msg.payload.session.session_id; 2333 2334 return 0; 2335 } 2336 2337 static int 2338 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2339 { 2340 bool crypto_session = virtio_has_feature(dev->protocol_features, 2341 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2342 VhostUserMsg msg = { 2343 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2344 .hdr.flags = VHOST_USER_VERSION, 2345 .hdr.size = sizeof(msg.payload.u64), 2346 }; 2347 msg.payload.u64 = session_id; 2348 2349 if (!crypto_session) { 2350 error_report("vhost-user trying to send unhandled ioctl"); 2351 return -1; 2352 } 2353 2354 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2355 error_report("vhost_user_write() return -1, close session failed"); 2356 return -1; 2357 } 2358 2359 return 0; 2360 } 2361 2362 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2363 MemoryRegionSection *section) 2364 { 2365 bool result; 2366 2367 result = memory_region_get_fd(section->mr) >= 0; 2368 2369 return result; 2370 } 2371 2372 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2373 uint16_t queue_size, 2374 struct vhost_inflight *inflight) 2375 { 2376 void *addr; 2377 int fd; 2378 struct vhost_user *u = dev->opaque; 2379 CharBackend *chr = u->user->chr; 2380 VhostUserMsg msg = { 2381 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2382 .hdr.flags = VHOST_USER_VERSION, 2383 .payload.inflight.num_queues = dev->nvqs, 2384 .payload.inflight.queue_size = queue_size, 2385 .hdr.size = sizeof(msg.payload.inflight), 2386 }; 2387 2388 if (!virtio_has_feature(dev->protocol_features, 2389 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2390 return 0; 2391 } 2392 2393 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2394 return -1; 2395 } 2396 2397 if (vhost_user_read(dev, &msg) < 0) { 2398 return -1; 2399 } 2400 2401 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2402 error_report("Received unexpected msg type. " 2403 "Expected %d received %d", 2404 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2405 return -1; 2406 } 2407 2408 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2409 error_report("Received bad msg size."); 2410 return -1; 2411 } 2412 2413 if (!msg.payload.inflight.mmap_size) { 2414 return 0; 2415 } 2416 2417 fd = qemu_chr_fe_get_msgfd(chr); 2418 if (fd < 0) { 2419 error_report("Failed to get mem fd"); 2420 return -1; 2421 } 2422 2423 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2424 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2425 2426 if (addr == MAP_FAILED) { 2427 error_report("Failed to mmap mem fd"); 2428 close(fd); 2429 return -1; 2430 } 2431 2432 inflight->addr = addr; 2433 inflight->fd = fd; 2434 inflight->size = msg.payload.inflight.mmap_size; 2435 inflight->offset = msg.payload.inflight.mmap_offset; 2436 inflight->queue_size = queue_size; 2437 2438 return 0; 2439 } 2440 2441 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2442 struct vhost_inflight *inflight) 2443 { 2444 VhostUserMsg msg = { 2445 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2446 .hdr.flags = VHOST_USER_VERSION, 2447 .payload.inflight.mmap_size = inflight->size, 2448 .payload.inflight.mmap_offset = inflight->offset, 2449 .payload.inflight.num_queues = dev->nvqs, 2450 .payload.inflight.queue_size = inflight->queue_size, 2451 .hdr.size = sizeof(msg.payload.inflight), 2452 }; 2453 2454 if (!virtio_has_feature(dev->protocol_features, 2455 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2456 return 0; 2457 } 2458 2459 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { 2460 return -1; 2461 } 2462 2463 return 0; 2464 } 2465 2466 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2467 { 2468 if (user->chr) { 2469 error_setg(errp, "Cannot initialize vhost-user state"); 2470 return false; 2471 } 2472 user->chr = chr; 2473 user->memory_slots = 0; 2474 return true; 2475 } 2476 2477 void vhost_user_cleanup(VhostUserState *user) 2478 { 2479 int i; 2480 2481 if (!user->chr) { 2482 return; 2483 } 2484 memory_region_transaction_begin(); 2485 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2486 if (user->notifier[i].addr) { 2487 object_unparent(OBJECT(&user->notifier[i].mr)); 2488 munmap(user->notifier[i].addr, qemu_real_host_page_size); 2489 user->notifier[i].addr = NULL; 2490 } 2491 } 2492 memory_region_transaction_commit(); 2493 user->chr = NULL; 2494 } 2495 2496 const VhostOps user_ops = { 2497 .backend_type = VHOST_BACKEND_TYPE_USER, 2498 .vhost_backend_init = vhost_user_backend_init, 2499 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2500 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2501 .vhost_set_log_base = vhost_user_set_log_base, 2502 .vhost_set_mem_table = vhost_user_set_mem_table, 2503 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2504 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2505 .vhost_set_vring_num = vhost_user_set_vring_num, 2506 .vhost_set_vring_base = vhost_user_set_vring_base, 2507 .vhost_get_vring_base = vhost_user_get_vring_base, 2508 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2509 .vhost_set_vring_call = vhost_user_set_vring_call, 2510 .vhost_set_features = vhost_user_set_features, 2511 .vhost_get_features = vhost_user_get_features, 2512 .vhost_set_owner = vhost_user_set_owner, 2513 .vhost_reset_device = vhost_user_reset_device, 2514 .vhost_get_vq_index = vhost_user_get_vq_index, 2515 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2516 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2517 .vhost_migration_done = vhost_user_migration_done, 2518 .vhost_backend_can_merge = vhost_user_can_merge, 2519 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2520 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2521 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2522 .vhost_get_config = vhost_user_get_config, 2523 .vhost_set_config = vhost_user_set_config, 2524 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2525 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2526 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2527 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2528 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2529 }; 2530