1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/cryptodev.h" 25 #include "migration/migration.h" 26 #include "migration/postcopy-ram.h" 27 #include "trace.h" 28 29 #include <sys/ioctl.h> 30 #include <sys/socket.h> 31 #include <sys/un.h> 32 33 #include "standard-headers/linux/vhost_types.h" 34 35 #ifdef CONFIG_LINUX 36 #include <linux/userfaultfd.h> 37 #endif 38 39 #define VHOST_MEMORY_BASELINE_NREGIONS 8 40 #define VHOST_USER_F_PROTOCOL_FEATURES 30 41 #define VHOST_USER_SLAVE_MAX_FDS 8 42 43 /* 44 * Set maximum number of RAM slots supported to 45 * the maximum number supported by the target 46 * hardware plaform. 47 */ 48 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 49 defined(TARGET_ARM) || defined(TARGET_ARM_64) 50 #include "hw/acpi/acpi.h" 51 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 52 53 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64) 54 #include "hw/ppc/spapr.h" 55 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 56 57 #else 58 #define VHOST_USER_MAX_RAM_SLOTS 512 59 #endif 60 61 /* 62 * Maximum size of virtio device config space 63 */ 64 #define VHOST_USER_MAX_CONFIG_SIZE 256 65 66 enum VhostUserProtocolFeature { 67 VHOST_USER_PROTOCOL_F_MQ = 0, 68 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 69 VHOST_USER_PROTOCOL_F_RARP = 2, 70 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 71 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 72 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 73 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 74 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 75 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 76 VHOST_USER_PROTOCOL_F_CONFIG = 9, 77 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 78 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 79 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 80 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 81 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 82 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 83 VHOST_USER_PROTOCOL_F_MAX 84 }; 85 86 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 87 88 typedef enum VhostUserRequest { 89 VHOST_USER_NONE = 0, 90 VHOST_USER_GET_FEATURES = 1, 91 VHOST_USER_SET_FEATURES = 2, 92 VHOST_USER_SET_OWNER = 3, 93 VHOST_USER_RESET_OWNER = 4, 94 VHOST_USER_SET_MEM_TABLE = 5, 95 VHOST_USER_SET_LOG_BASE = 6, 96 VHOST_USER_SET_LOG_FD = 7, 97 VHOST_USER_SET_VRING_NUM = 8, 98 VHOST_USER_SET_VRING_ADDR = 9, 99 VHOST_USER_SET_VRING_BASE = 10, 100 VHOST_USER_GET_VRING_BASE = 11, 101 VHOST_USER_SET_VRING_KICK = 12, 102 VHOST_USER_SET_VRING_CALL = 13, 103 VHOST_USER_SET_VRING_ERR = 14, 104 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 105 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 106 VHOST_USER_GET_QUEUE_NUM = 17, 107 VHOST_USER_SET_VRING_ENABLE = 18, 108 VHOST_USER_SEND_RARP = 19, 109 VHOST_USER_NET_SET_MTU = 20, 110 VHOST_USER_SET_SLAVE_REQ_FD = 21, 111 VHOST_USER_IOTLB_MSG = 22, 112 VHOST_USER_SET_VRING_ENDIAN = 23, 113 VHOST_USER_GET_CONFIG = 24, 114 VHOST_USER_SET_CONFIG = 25, 115 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 116 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 117 VHOST_USER_POSTCOPY_ADVISE = 28, 118 VHOST_USER_POSTCOPY_LISTEN = 29, 119 VHOST_USER_POSTCOPY_END = 30, 120 VHOST_USER_GET_INFLIGHT_FD = 31, 121 VHOST_USER_SET_INFLIGHT_FD = 32, 122 VHOST_USER_GPU_SET_SOCKET = 33, 123 VHOST_USER_RESET_DEVICE = 34, 124 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 125 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 126 VHOST_USER_ADD_MEM_REG = 37, 127 VHOST_USER_REM_MEM_REG = 38, 128 VHOST_USER_MAX 129 } VhostUserRequest; 130 131 typedef enum VhostUserSlaveRequest { 132 VHOST_USER_SLAVE_NONE = 0, 133 VHOST_USER_SLAVE_IOTLB_MSG = 1, 134 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 135 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 136 VHOST_USER_SLAVE_MAX 137 } VhostUserSlaveRequest; 138 139 typedef struct VhostUserMemoryRegion { 140 uint64_t guest_phys_addr; 141 uint64_t memory_size; 142 uint64_t userspace_addr; 143 uint64_t mmap_offset; 144 } VhostUserMemoryRegion; 145 146 typedef struct VhostUserMemory { 147 uint32_t nregions; 148 uint32_t padding; 149 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 150 } VhostUserMemory; 151 152 typedef struct VhostUserMemRegMsg { 153 uint64_t padding; 154 VhostUserMemoryRegion region; 155 } VhostUserMemRegMsg; 156 157 typedef struct VhostUserLog { 158 uint64_t mmap_size; 159 uint64_t mmap_offset; 160 } VhostUserLog; 161 162 typedef struct VhostUserConfig { 163 uint32_t offset; 164 uint32_t size; 165 uint32_t flags; 166 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 167 } VhostUserConfig; 168 169 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 170 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 171 172 typedef struct VhostUserCryptoSession { 173 /* session id for success, -1 on errors */ 174 int64_t session_id; 175 CryptoDevBackendSymSessionInfo session_setup_data; 176 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 177 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 178 } VhostUserCryptoSession; 179 180 static VhostUserConfig c __attribute__ ((unused)); 181 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 182 + sizeof(c.size) \ 183 + sizeof(c.flags)) 184 185 typedef struct VhostUserVringArea { 186 uint64_t u64; 187 uint64_t size; 188 uint64_t offset; 189 } VhostUserVringArea; 190 191 typedef struct VhostUserInflight { 192 uint64_t mmap_size; 193 uint64_t mmap_offset; 194 uint16_t num_queues; 195 uint16_t queue_size; 196 } VhostUserInflight; 197 198 typedef struct { 199 VhostUserRequest request; 200 201 #define VHOST_USER_VERSION_MASK (0x3) 202 #define VHOST_USER_REPLY_MASK (0x1<<2) 203 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 204 uint32_t flags; 205 uint32_t size; /* the following payload size */ 206 } QEMU_PACKED VhostUserHeader; 207 208 typedef union { 209 #define VHOST_USER_VRING_IDX_MASK (0xff) 210 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 211 uint64_t u64; 212 struct vhost_vring_state state; 213 struct vhost_vring_addr addr; 214 VhostUserMemory memory; 215 VhostUserMemRegMsg mem_reg; 216 VhostUserLog log; 217 struct vhost_iotlb_msg iotlb; 218 VhostUserConfig config; 219 VhostUserCryptoSession session; 220 VhostUserVringArea area; 221 VhostUserInflight inflight; 222 } VhostUserPayload; 223 224 typedef struct VhostUserMsg { 225 VhostUserHeader hdr; 226 VhostUserPayload payload; 227 } QEMU_PACKED VhostUserMsg; 228 229 static VhostUserMsg m __attribute__ ((unused)); 230 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 231 232 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 233 234 /* The version of the protocol we support */ 235 #define VHOST_USER_VERSION (0x1) 236 237 struct vhost_user { 238 struct vhost_dev *dev; 239 /* Shared between vhost devs of the same virtio device */ 240 VhostUserState *user; 241 QIOChannel *slave_ioc; 242 GSource *slave_src; 243 NotifierWithReturn postcopy_notifier; 244 struct PostCopyFD postcopy_fd; 245 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 246 /* Length of the region_rb and region_rb_offset arrays */ 247 size_t region_rb_len; 248 /* RAMBlock associated with a given region */ 249 RAMBlock **region_rb; 250 /* The offset from the start of the RAMBlock to the start of the 251 * vhost region. 252 */ 253 ram_addr_t *region_rb_offset; 254 255 /* True once we've entered postcopy_listen */ 256 bool postcopy_listen; 257 258 /* Our current regions */ 259 int num_shadow_regions; 260 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 261 }; 262 263 struct scrub_regions { 264 struct vhost_memory_region *region; 265 int reg_idx; 266 int fd_idx; 267 }; 268 269 static bool ioeventfd_enabled(void) 270 { 271 return !kvm_enabled() || kvm_eventfds_enabled(); 272 } 273 274 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 275 { 276 struct vhost_user *u = dev->opaque; 277 CharBackend *chr = u->user->chr; 278 uint8_t *p = (uint8_t *) msg; 279 int r, size = VHOST_USER_HDR_SIZE; 280 281 r = qemu_chr_fe_read_all(chr, p, size); 282 if (r != size) { 283 int saved_errno = errno; 284 error_report("Failed to read msg header. Read %d instead of %d." 285 " Original request %d.", r, size, msg->hdr.request); 286 return r < 0 ? -saved_errno : -EIO; 287 } 288 289 /* validate received flags */ 290 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 291 error_report("Failed to read msg header." 292 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 293 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 294 return -EPROTO; 295 } 296 297 return 0; 298 } 299 300 struct vhost_user_read_cb_data { 301 struct vhost_dev *dev; 302 VhostUserMsg *msg; 303 GMainLoop *loop; 304 int ret; 305 }; 306 307 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, 308 gpointer opaque) 309 { 310 struct vhost_user_read_cb_data *data = opaque; 311 struct vhost_dev *dev = data->dev; 312 VhostUserMsg *msg = data->msg; 313 struct vhost_user *u = dev->opaque; 314 CharBackend *chr = u->user->chr; 315 uint8_t *p = (uint8_t *) msg; 316 int r, size; 317 318 r = vhost_user_read_header(dev, msg); 319 if (r < 0) { 320 data->ret = r; 321 goto end; 322 } 323 324 /* validate message size is sane */ 325 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 326 error_report("Failed to read msg header." 327 " Size %d exceeds the maximum %zu.", msg->hdr.size, 328 VHOST_USER_PAYLOAD_SIZE); 329 data->ret = -EPROTO; 330 goto end; 331 } 332 333 if (msg->hdr.size) { 334 p += VHOST_USER_HDR_SIZE; 335 size = msg->hdr.size; 336 r = qemu_chr_fe_read_all(chr, p, size); 337 if (r != size) { 338 int saved_errno = errno; 339 error_report("Failed to read msg payload." 340 " Read %d instead of %d.", r, msg->hdr.size); 341 data->ret = r < 0 ? -saved_errno : -EIO; 342 goto end; 343 } 344 } 345 346 end: 347 g_main_loop_quit(data->loop); 348 return G_SOURCE_REMOVE; 349 } 350 351 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 352 gpointer opaque); 353 354 /* 355 * This updates the read handler to use a new event loop context. 356 * Event sources are removed from the previous context : this ensures 357 * that events detected in the previous context are purged. They will 358 * be re-detected and processed in the new context. 359 */ 360 static void slave_update_read_handler(struct vhost_dev *dev, 361 GMainContext *ctxt) 362 { 363 struct vhost_user *u = dev->opaque; 364 365 if (!u->slave_ioc) { 366 return; 367 } 368 369 if (u->slave_src) { 370 g_source_destroy(u->slave_src); 371 g_source_unref(u->slave_src); 372 } 373 374 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 375 G_IO_IN | G_IO_HUP, 376 slave_read, dev, NULL, 377 ctxt); 378 } 379 380 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 381 { 382 struct vhost_user *u = dev->opaque; 383 CharBackend *chr = u->user->chr; 384 GMainContext *prev_ctxt = chr->chr->gcontext; 385 GMainContext *ctxt = g_main_context_new(); 386 GMainLoop *loop = g_main_loop_new(ctxt, FALSE); 387 struct vhost_user_read_cb_data data = { 388 .dev = dev, 389 .loop = loop, 390 .msg = msg, 391 .ret = 0 392 }; 393 394 /* 395 * We want to be able to monitor the slave channel fd while waiting 396 * for chr I/O. This requires an event loop, but we can't nest the 397 * one to which chr is currently attached : its fd handlers might not 398 * be prepared for re-entrancy. So we create a new one and switch chr 399 * to use it. 400 */ 401 slave_update_read_handler(dev, ctxt); 402 qemu_chr_be_update_read_handlers(chr->chr, ctxt); 403 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); 404 405 g_main_loop_run(loop); 406 407 /* 408 * Restore the previous event loop context. This also destroys/recreates 409 * event sources : this guarantees that all pending events in the original 410 * context that have been processed by the nested loop are purged. 411 */ 412 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); 413 slave_update_read_handler(dev, NULL); 414 415 g_main_loop_unref(loop); 416 g_main_context_unref(ctxt); 417 418 return data.ret; 419 } 420 421 static int process_message_reply(struct vhost_dev *dev, 422 const VhostUserMsg *msg) 423 { 424 int ret; 425 VhostUserMsg msg_reply; 426 427 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 428 return 0; 429 } 430 431 ret = vhost_user_read(dev, &msg_reply); 432 if (ret < 0) { 433 return ret; 434 } 435 436 if (msg_reply.hdr.request != msg->hdr.request) { 437 error_report("Received unexpected msg type. " 438 "Expected %d received %d", 439 msg->hdr.request, msg_reply.hdr.request); 440 return -EPROTO; 441 } 442 443 return msg_reply.payload.u64 ? -EIO : 0; 444 } 445 446 static bool vhost_user_one_time_request(VhostUserRequest request) 447 { 448 switch (request) { 449 case VHOST_USER_SET_OWNER: 450 case VHOST_USER_RESET_OWNER: 451 case VHOST_USER_SET_MEM_TABLE: 452 case VHOST_USER_GET_QUEUE_NUM: 453 case VHOST_USER_NET_SET_MTU: 454 return true; 455 default: 456 return false; 457 } 458 } 459 460 /* most non-init callers ignore the error */ 461 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 462 int *fds, int fd_num) 463 { 464 struct vhost_user *u = dev->opaque; 465 CharBackend *chr = u->user->chr; 466 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 467 468 /* 469 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 470 * we just need send it once in the first time. For later such 471 * request, we just ignore it. 472 */ 473 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 474 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 475 return 0; 476 } 477 478 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 479 error_report("Failed to set msg fds."); 480 return -EINVAL; 481 } 482 483 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 484 if (ret != size) { 485 int saved_errno = errno; 486 error_report("Failed to write msg." 487 " Wrote %d instead of %d.", ret, size); 488 return ret < 0 ? -saved_errno : -EIO; 489 } 490 491 return 0; 492 } 493 494 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 495 { 496 VhostUserMsg msg = { 497 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 498 .hdr.flags = VHOST_USER_VERSION, 499 }; 500 501 return vhost_user_write(dev, &msg, &fd, 1); 502 } 503 504 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 505 struct vhost_log *log) 506 { 507 int fds[VHOST_USER_MAX_RAM_SLOTS]; 508 size_t fd_num = 0; 509 bool shmfd = virtio_has_feature(dev->protocol_features, 510 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 511 int ret; 512 VhostUserMsg msg = { 513 .hdr.request = VHOST_USER_SET_LOG_BASE, 514 .hdr.flags = VHOST_USER_VERSION, 515 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 516 .payload.log.mmap_offset = 0, 517 .hdr.size = sizeof(msg.payload.log), 518 }; 519 520 if (shmfd && log->fd != -1) { 521 fds[fd_num++] = log->fd; 522 } 523 524 ret = vhost_user_write(dev, &msg, fds, fd_num); 525 if (ret < 0) { 526 return ret; 527 } 528 529 if (shmfd) { 530 msg.hdr.size = 0; 531 ret = vhost_user_read(dev, &msg); 532 if (ret < 0) { 533 return ret; 534 } 535 536 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 537 error_report("Received unexpected msg type. " 538 "Expected %d received %d", 539 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 540 return -EPROTO; 541 } 542 } 543 544 return 0; 545 } 546 547 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 548 int *fd) 549 { 550 MemoryRegion *mr; 551 552 assert((uintptr_t)addr == addr); 553 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 554 *fd = memory_region_get_fd(mr); 555 556 return mr; 557 } 558 559 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 560 struct vhost_memory_region *src, 561 uint64_t mmap_offset) 562 { 563 assert(src != NULL && dst != NULL); 564 dst->userspace_addr = src->userspace_addr; 565 dst->memory_size = src->memory_size; 566 dst->guest_phys_addr = src->guest_phys_addr; 567 dst->mmap_offset = mmap_offset; 568 } 569 570 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 571 struct vhost_dev *dev, 572 VhostUserMsg *msg, 573 int *fds, size_t *fd_num, 574 bool track_ramblocks) 575 { 576 int i, fd; 577 ram_addr_t offset; 578 MemoryRegion *mr; 579 struct vhost_memory_region *reg; 580 VhostUserMemoryRegion region_buffer; 581 582 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 583 584 for (i = 0; i < dev->mem->nregions; ++i) { 585 reg = dev->mem->regions + i; 586 587 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 588 if (fd > 0) { 589 if (track_ramblocks) { 590 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 591 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 592 reg->memory_size, 593 reg->guest_phys_addr, 594 reg->userspace_addr, 595 offset); 596 u->region_rb_offset[i] = offset; 597 u->region_rb[i] = mr->ram_block; 598 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 599 error_report("Failed preparing vhost-user memory table msg"); 600 return -ENOBUFS; 601 } 602 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 603 msg->payload.memory.regions[*fd_num] = region_buffer; 604 fds[(*fd_num)++] = fd; 605 } else if (track_ramblocks) { 606 u->region_rb_offset[i] = 0; 607 u->region_rb[i] = NULL; 608 } 609 } 610 611 msg->payload.memory.nregions = *fd_num; 612 613 if (!*fd_num) { 614 error_report("Failed initializing vhost-user memory map, " 615 "consider using -object memory-backend-file share=on"); 616 return -EINVAL; 617 } 618 619 msg->hdr.size = sizeof(msg->payload.memory.nregions); 620 msg->hdr.size += sizeof(msg->payload.memory.padding); 621 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 622 623 return 0; 624 } 625 626 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 627 struct vhost_memory_region *vdev_reg) 628 { 629 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 630 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 631 shadow_reg->memory_size == vdev_reg->memory_size; 632 } 633 634 static void scrub_shadow_regions(struct vhost_dev *dev, 635 struct scrub_regions *add_reg, 636 int *nr_add_reg, 637 struct scrub_regions *rem_reg, 638 int *nr_rem_reg, uint64_t *shadow_pcb, 639 bool track_ramblocks) 640 { 641 struct vhost_user *u = dev->opaque; 642 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 643 struct vhost_memory_region *reg, *shadow_reg; 644 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 645 ram_addr_t offset; 646 MemoryRegion *mr; 647 bool matching; 648 649 /* 650 * Find memory regions present in our shadow state which are not in 651 * the device's current memory state. 652 * 653 * Mark regions in both the shadow and device state as "found". 654 */ 655 for (i = 0; i < u->num_shadow_regions; i++) { 656 shadow_reg = &u->shadow_regions[i]; 657 matching = false; 658 659 for (j = 0; j < dev->mem->nregions; j++) { 660 reg = &dev->mem->regions[j]; 661 662 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 663 664 if (reg_equal(shadow_reg, reg)) { 665 matching = true; 666 found[j] = true; 667 if (track_ramblocks) { 668 /* 669 * Reset postcopy client bases, region_rb, and 670 * region_rb_offset in case regions are removed. 671 */ 672 if (fd > 0) { 673 u->region_rb_offset[j] = offset; 674 u->region_rb[j] = mr->ram_block; 675 shadow_pcb[j] = u->postcopy_client_bases[i]; 676 } else { 677 u->region_rb_offset[j] = 0; 678 u->region_rb[j] = NULL; 679 } 680 } 681 break; 682 } 683 } 684 685 /* 686 * If the region was not found in the current device memory state 687 * create an entry for it in the removed list. 688 */ 689 if (!matching) { 690 rem_reg[rm_idx].region = shadow_reg; 691 rem_reg[rm_idx++].reg_idx = i; 692 } 693 } 694 695 /* 696 * For regions not marked "found", create entries in the added list. 697 * 698 * Note their indexes in the device memory state and the indexes of their 699 * file descriptors. 700 */ 701 for (i = 0; i < dev->mem->nregions; i++) { 702 reg = &dev->mem->regions[i]; 703 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 704 if (fd > 0) { 705 ++fd_num; 706 } 707 708 /* 709 * If the region was in both the shadow and device state we don't 710 * need to send a VHOST_USER_ADD_MEM_REG message for it. 711 */ 712 if (found[i]) { 713 continue; 714 } 715 716 add_reg[add_idx].region = reg; 717 add_reg[add_idx].reg_idx = i; 718 add_reg[add_idx++].fd_idx = fd_num; 719 } 720 *nr_rem_reg = rm_idx; 721 *nr_add_reg = add_idx; 722 723 return; 724 } 725 726 static int send_remove_regions(struct vhost_dev *dev, 727 struct scrub_regions *remove_reg, 728 int nr_rem_reg, VhostUserMsg *msg, 729 bool reply_supported) 730 { 731 struct vhost_user *u = dev->opaque; 732 struct vhost_memory_region *shadow_reg; 733 int i, fd, shadow_reg_idx, ret; 734 ram_addr_t offset; 735 VhostUserMemoryRegion region_buffer; 736 737 /* 738 * The regions in remove_reg appear in the same order they do in the 739 * shadow table. Therefore we can minimize memory copies by iterating 740 * through remove_reg backwards. 741 */ 742 for (i = nr_rem_reg - 1; i >= 0; i--) { 743 shadow_reg = remove_reg[i].region; 744 shadow_reg_idx = remove_reg[i].reg_idx; 745 746 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 747 748 if (fd > 0) { 749 msg->hdr.request = VHOST_USER_REM_MEM_REG; 750 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 751 msg->payload.mem_reg.region = region_buffer; 752 753 ret = vhost_user_write(dev, msg, &fd, 1); 754 if (ret < 0) { 755 return ret; 756 } 757 758 if (reply_supported) { 759 ret = process_message_reply(dev, msg); 760 if (ret) { 761 return ret; 762 } 763 } 764 } 765 766 /* 767 * At this point we know the backend has unmapped the region. It is now 768 * safe to remove it from the shadow table. 769 */ 770 memmove(&u->shadow_regions[shadow_reg_idx], 771 &u->shadow_regions[shadow_reg_idx + 1], 772 sizeof(struct vhost_memory_region) * 773 (u->num_shadow_regions - shadow_reg_idx - 1)); 774 u->num_shadow_regions--; 775 } 776 777 return 0; 778 } 779 780 static int send_add_regions(struct vhost_dev *dev, 781 struct scrub_regions *add_reg, int nr_add_reg, 782 VhostUserMsg *msg, uint64_t *shadow_pcb, 783 bool reply_supported, bool track_ramblocks) 784 { 785 struct vhost_user *u = dev->opaque; 786 int i, fd, ret, reg_idx, reg_fd_idx; 787 struct vhost_memory_region *reg; 788 MemoryRegion *mr; 789 ram_addr_t offset; 790 VhostUserMsg msg_reply; 791 VhostUserMemoryRegion region_buffer; 792 793 for (i = 0; i < nr_add_reg; i++) { 794 reg = add_reg[i].region; 795 reg_idx = add_reg[i].reg_idx; 796 reg_fd_idx = add_reg[i].fd_idx; 797 798 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 799 800 if (fd > 0) { 801 if (track_ramblocks) { 802 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 803 reg->memory_size, 804 reg->guest_phys_addr, 805 reg->userspace_addr, 806 offset); 807 u->region_rb_offset[reg_idx] = offset; 808 u->region_rb[reg_idx] = mr->ram_block; 809 } 810 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 811 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 812 msg->payload.mem_reg.region = region_buffer; 813 814 ret = vhost_user_write(dev, msg, &fd, 1); 815 if (ret < 0) { 816 return ret; 817 } 818 819 if (track_ramblocks) { 820 uint64_t reply_gpa; 821 822 ret = vhost_user_read(dev, &msg_reply); 823 if (ret < 0) { 824 return ret; 825 } 826 827 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 828 829 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 830 error_report("%s: Received unexpected msg type." 831 "Expected %d received %d", __func__, 832 VHOST_USER_ADD_MEM_REG, 833 msg_reply.hdr.request); 834 return -EPROTO; 835 } 836 837 /* 838 * We're using the same structure, just reusing one of the 839 * fields, so it should be the same size. 840 */ 841 if (msg_reply.hdr.size != msg->hdr.size) { 842 error_report("%s: Unexpected size for postcopy reply " 843 "%d vs %d", __func__, msg_reply.hdr.size, 844 msg->hdr.size); 845 return -EPROTO; 846 } 847 848 /* Get the postcopy client base from the backend's reply. */ 849 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 850 shadow_pcb[reg_idx] = 851 msg_reply.payload.mem_reg.region.userspace_addr; 852 trace_vhost_user_set_mem_table_postcopy( 853 msg_reply.payload.mem_reg.region.userspace_addr, 854 msg->payload.mem_reg.region.userspace_addr, 855 reg_fd_idx, reg_idx); 856 } else { 857 error_report("%s: invalid postcopy reply for region. " 858 "Got guest physical address %" PRIX64 ", expected " 859 "%" PRIX64, __func__, reply_gpa, 860 dev->mem->regions[reg_idx].guest_phys_addr); 861 return -EPROTO; 862 } 863 } else if (reply_supported) { 864 ret = process_message_reply(dev, msg); 865 if (ret) { 866 return ret; 867 } 868 } 869 } else if (track_ramblocks) { 870 u->region_rb_offset[reg_idx] = 0; 871 u->region_rb[reg_idx] = NULL; 872 } 873 874 /* 875 * At this point, we know the backend has mapped in the new 876 * region, if the region has a valid file descriptor. 877 * 878 * The region should now be added to the shadow table. 879 */ 880 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 881 reg->guest_phys_addr; 882 u->shadow_regions[u->num_shadow_regions].userspace_addr = 883 reg->userspace_addr; 884 u->shadow_regions[u->num_shadow_regions].memory_size = 885 reg->memory_size; 886 u->num_shadow_regions++; 887 } 888 889 return 0; 890 } 891 892 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 893 VhostUserMsg *msg, 894 bool reply_supported, 895 bool track_ramblocks) 896 { 897 struct vhost_user *u = dev->opaque; 898 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 899 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 900 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 901 int nr_add_reg, nr_rem_reg; 902 int ret; 903 904 msg->hdr.size = sizeof(msg->payload.mem_reg); 905 906 /* Find the regions which need to be removed or added. */ 907 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 908 shadow_pcb, track_ramblocks); 909 910 if (nr_rem_reg) { 911 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 912 reply_supported); 913 if (ret < 0) { 914 goto err; 915 } 916 } 917 918 if (nr_add_reg) { 919 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 920 reply_supported, track_ramblocks); 921 if (ret < 0) { 922 goto err; 923 } 924 } 925 926 if (track_ramblocks) { 927 memcpy(u->postcopy_client_bases, shadow_pcb, 928 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 929 /* 930 * Now we've registered this with the postcopy code, we ack to the 931 * client, because now we're in the position to be able to deal with 932 * any faults it generates. 933 */ 934 /* TODO: Use this for failure cases as well with a bad value. */ 935 msg->hdr.size = sizeof(msg->payload.u64); 936 msg->payload.u64 = 0; /* OK */ 937 938 ret = vhost_user_write(dev, msg, NULL, 0); 939 if (ret < 0) { 940 return ret; 941 } 942 } 943 944 return 0; 945 946 err: 947 if (track_ramblocks) { 948 memcpy(u->postcopy_client_bases, shadow_pcb, 949 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 950 } 951 952 return ret; 953 } 954 955 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 956 struct vhost_memory *mem, 957 bool reply_supported, 958 bool config_mem_slots) 959 { 960 struct vhost_user *u = dev->opaque; 961 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 962 size_t fd_num = 0; 963 VhostUserMsg msg_reply; 964 int region_i, msg_i; 965 int ret; 966 967 VhostUserMsg msg = { 968 .hdr.flags = VHOST_USER_VERSION, 969 }; 970 971 if (u->region_rb_len < dev->mem->nregions) { 972 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 973 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 974 dev->mem->nregions); 975 memset(&(u->region_rb[u->region_rb_len]), '\0', 976 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 977 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 978 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 979 u->region_rb_len = dev->mem->nregions; 980 } 981 982 if (config_mem_slots) { 983 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 984 if (ret < 0) { 985 return ret; 986 } 987 } else { 988 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 989 true); 990 if (ret < 0) { 991 return ret; 992 } 993 994 ret = vhost_user_write(dev, &msg, fds, fd_num); 995 if (ret < 0) { 996 return ret; 997 } 998 999 ret = vhost_user_read(dev, &msg_reply); 1000 if (ret < 0) { 1001 return ret; 1002 } 1003 1004 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 1005 error_report("%s: Received unexpected msg type." 1006 "Expected %d received %d", __func__, 1007 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 1008 return -EPROTO; 1009 } 1010 1011 /* 1012 * We're using the same structure, just reusing one of the 1013 * fields, so it should be the same size. 1014 */ 1015 if (msg_reply.hdr.size != msg.hdr.size) { 1016 error_report("%s: Unexpected size for postcopy reply " 1017 "%d vs %d", __func__, msg_reply.hdr.size, 1018 msg.hdr.size); 1019 return -EPROTO; 1020 } 1021 1022 memset(u->postcopy_client_bases, 0, 1023 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 1024 1025 /* 1026 * They're in the same order as the regions that were sent 1027 * but some of the regions were skipped (above) if they 1028 * didn't have fd's 1029 */ 1030 for (msg_i = 0, region_i = 0; 1031 region_i < dev->mem->nregions; 1032 region_i++) { 1033 if (msg_i < fd_num && 1034 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 1035 dev->mem->regions[region_i].guest_phys_addr) { 1036 u->postcopy_client_bases[region_i] = 1037 msg_reply.payload.memory.regions[msg_i].userspace_addr; 1038 trace_vhost_user_set_mem_table_postcopy( 1039 msg_reply.payload.memory.regions[msg_i].userspace_addr, 1040 msg.payload.memory.regions[msg_i].userspace_addr, 1041 msg_i, region_i); 1042 msg_i++; 1043 } 1044 } 1045 if (msg_i != fd_num) { 1046 error_report("%s: postcopy reply not fully consumed " 1047 "%d vs %zd", 1048 __func__, msg_i, fd_num); 1049 return -EIO; 1050 } 1051 1052 /* 1053 * Now we've registered this with the postcopy code, we ack to the 1054 * client, because now we're in the position to be able to deal 1055 * with any faults it generates. 1056 */ 1057 /* TODO: Use this for failure cases as well with a bad value. */ 1058 msg.hdr.size = sizeof(msg.payload.u64); 1059 msg.payload.u64 = 0; /* OK */ 1060 ret = vhost_user_write(dev, &msg, NULL, 0); 1061 if (ret < 0) { 1062 return ret; 1063 } 1064 } 1065 1066 return 0; 1067 } 1068 1069 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1070 struct vhost_memory *mem) 1071 { 1072 struct vhost_user *u = dev->opaque; 1073 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1074 size_t fd_num = 0; 1075 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1076 bool reply_supported = virtio_has_feature(dev->protocol_features, 1077 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1078 bool config_mem_slots = 1079 virtio_has_feature(dev->protocol_features, 1080 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1081 int ret; 1082 1083 if (do_postcopy) { 1084 /* 1085 * Postcopy has enough differences that it's best done in it's own 1086 * version 1087 */ 1088 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1089 config_mem_slots); 1090 } 1091 1092 VhostUserMsg msg = { 1093 .hdr.flags = VHOST_USER_VERSION, 1094 }; 1095 1096 if (reply_supported) { 1097 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1098 } 1099 1100 if (config_mem_slots) { 1101 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1102 if (ret < 0) { 1103 return ret; 1104 } 1105 } else { 1106 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1107 false); 1108 if (ret < 0) { 1109 return ret; 1110 } 1111 1112 ret = vhost_user_write(dev, &msg, fds, fd_num); 1113 if (ret < 0) { 1114 return ret; 1115 } 1116 1117 if (reply_supported) { 1118 return process_message_reply(dev, &msg); 1119 } 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1126 struct vhost_vring_state *ring) 1127 { 1128 bool cross_endian = virtio_has_feature(dev->protocol_features, 1129 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1130 VhostUserMsg msg = { 1131 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1132 .hdr.flags = VHOST_USER_VERSION, 1133 .payload.state = *ring, 1134 .hdr.size = sizeof(msg.payload.state), 1135 }; 1136 1137 if (!cross_endian) { 1138 error_report("vhost-user trying to send unhandled ioctl"); 1139 return -ENOTSUP; 1140 } 1141 1142 return vhost_user_write(dev, &msg, NULL, 0); 1143 } 1144 1145 static int vhost_set_vring(struct vhost_dev *dev, 1146 unsigned long int request, 1147 struct vhost_vring_state *ring) 1148 { 1149 VhostUserMsg msg = { 1150 .hdr.request = request, 1151 .hdr.flags = VHOST_USER_VERSION, 1152 .payload.state = *ring, 1153 .hdr.size = sizeof(msg.payload.state), 1154 }; 1155 1156 return vhost_user_write(dev, &msg, NULL, 0); 1157 } 1158 1159 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1160 struct vhost_vring_state *ring) 1161 { 1162 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1163 } 1164 1165 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 1166 int queue_idx) 1167 { 1168 struct vhost_user *u = dev->opaque; 1169 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1170 VirtIODevice *vdev = dev->vdev; 1171 1172 if (n->addr && !n->set) { 1173 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 1174 n->set = true; 1175 } 1176 } 1177 1178 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 1179 int queue_idx) 1180 { 1181 struct vhost_user *u = dev->opaque; 1182 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1183 VirtIODevice *vdev = dev->vdev; 1184 1185 if (n->addr && n->set) { 1186 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1187 n->set = false; 1188 } 1189 } 1190 1191 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1192 struct vhost_vring_state *ring) 1193 { 1194 vhost_user_host_notifier_restore(dev, ring->index); 1195 1196 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1197 } 1198 1199 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1200 { 1201 int i; 1202 1203 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1204 return -EINVAL; 1205 } 1206 1207 for (i = 0; i < dev->nvqs; ++i) { 1208 int ret; 1209 struct vhost_vring_state state = { 1210 .index = dev->vq_index + i, 1211 .num = enable, 1212 }; 1213 1214 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1215 if (ret < 0) { 1216 /* 1217 * Restoring the previous state is likely infeasible, as well as 1218 * proceeding regardless the error, so just bail out and hope for 1219 * the device-level recovery. 1220 */ 1221 return ret; 1222 } 1223 } 1224 1225 return 0; 1226 } 1227 1228 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1229 struct vhost_vring_state *ring) 1230 { 1231 int ret; 1232 VhostUserMsg msg = { 1233 .hdr.request = VHOST_USER_GET_VRING_BASE, 1234 .hdr.flags = VHOST_USER_VERSION, 1235 .payload.state = *ring, 1236 .hdr.size = sizeof(msg.payload.state), 1237 }; 1238 1239 vhost_user_host_notifier_remove(dev, ring->index); 1240 1241 ret = vhost_user_write(dev, &msg, NULL, 0); 1242 if (ret < 0) { 1243 return ret; 1244 } 1245 1246 ret = vhost_user_read(dev, &msg); 1247 if (ret < 0) { 1248 return ret; 1249 } 1250 1251 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1252 error_report("Received unexpected msg type. Expected %d received %d", 1253 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1254 return -EPROTO; 1255 } 1256 1257 if (msg.hdr.size != sizeof(msg.payload.state)) { 1258 error_report("Received bad msg size."); 1259 return -EPROTO; 1260 } 1261 1262 *ring = msg.payload.state; 1263 1264 return 0; 1265 } 1266 1267 static int vhost_set_vring_file(struct vhost_dev *dev, 1268 VhostUserRequest request, 1269 struct vhost_vring_file *file) 1270 { 1271 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1272 size_t fd_num = 0; 1273 VhostUserMsg msg = { 1274 .hdr.request = request, 1275 .hdr.flags = VHOST_USER_VERSION, 1276 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1277 .hdr.size = sizeof(msg.payload.u64), 1278 }; 1279 1280 if (ioeventfd_enabled() && file->fd > 0) { 1281 fds[fd_num++] = file->fd; 1282 } else { 1283 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1284 } 1285 1286 return vhost_user_write(dev, &msg, fds, fd_num); 1287 } 1288 1289 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1290 struct vhost_vring_file *file) 1291 { 1292 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1293 } 1294 1295 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1296 struct vhost_vring_file *file) 1297 { 1298 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1299 } 1300 1301 1302 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1303 { 1304 int ret; 1305 VhostUserMsg msg = { 1306 .hdr.request = request, 1307 .hdr.flags = VHOST_USER_VERSION, 1308 }; 1309 1310 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1311 return 0; 1312 } 1313 1314 ret = vhost_user_write(dev, &msg, NULL, 0); 1315 if (ret < 0) { 1316 return ret; 1317 } 1318 1319 ret = vhost_user_read(dev, &msg); 1320 if (ret < 0) { 1321 return ret; 1322 } 1323 1324 if (msg.hdr.request != request) { 1325 error_report("Received unexpected msg type. Expected %d received %d", 1326 request, msg.hdr.request); 1327 return -EPROTO; 1328 } 1329 1330 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1331 error_report("Received bad msg size."); 1332 return -EPROTO; 1333 } 1334 1335 *u64 = msg.payload.u64; 1336 1337 return 0; 1338 } 1339 1340 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1341 { 1342 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1343 return -EPROTO; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static int enforce_reply(struct vhost_dev *dev, 1350 const VhostUserMsg *msg) 1351 { 1352 uint64_t dummy; 1353 1354 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1355 return process_message_reply(dev, msg); 1356 } 1357 1358 /* 1359 * We need to wait for a reply but the backend does not 1360 * support replies for the command we just sent. 1361 * Send VHOST_USER_GET_FEATURES which makes all backends 1362 * send a reply. 1363 */ 1364 return vhost_user_get_features(dev, &dummy); 1365 } 1366 1367 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1368 struct vhost_vring_addr *addr) 1369 { 1370 int ret; 1371 VhostUserMsg msg = { 1372 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1373 .hdr.flags = VHOST_USER_VERSION, 1374 .payload.addr = *addr, 1375 .hdr.size = sizeof(msg.payload.addr), 1376 }; 1377 1378 bool reply_supported = virtio_has_feature(dev->protocol_features, 1379 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1380 1381 /* 1382 * wait for a reply if logging is enabled to make sure 1383 * backend is actually logging changes 1384 */ 1385 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1386 1387 if (reply_supported && wait_for_reply) { 1388 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1389 } 1390 1391 ret = vhost_user_write(dev, &msg, NULL, 0); 1392 if (ret < 0) { 1393 return ret; 1394 } 1395 1396 if (wait_for_reply) { 1397 return enforce_reply(dev, &msg); 1398 } 1399 1400 return 0; 1401 } 1402 1403 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1404 bool wait_for_reply) 1405 { 1406 VhostUserMsg msg = { 1407 .hdr.request = request, 1408 .hdr.flags = VHOST_USER_VERSION, 1409 .payload.u64 = u64, 1410 .hdr.size = sizeof(msg.payload.u64), 1411 }; 1412 int ret; 1413 1414 if (wait_for_reply) { 1415 bool reply_supported = virtio_has_feature(dev->protocol_features, 1416 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1417 if (reply_supported) { 1418 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1419 } 1420 } 1421 1422 ret = vhost_user_write(dev, &msg, NULL, 0); 1423 if (ret < 0) { 1424 return ret; 1425 } 1426 1427 if (wait_for_reply) { 1428 return enforce_reply(dev, &msg); 1429 } 1430 1431 return 0; 1432 } 1433 1434 static int vhost_user_set_features(struct vhost_dev *dev, 1435 uint64_t features) 1436 { 1437 /* 1438 * wait for a reply if logging is enabled to make sure 1439 * backend is actually logging changes 1440 */ 1441 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1442 1443 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, 1444 log_enabled); 1445 } 1446 1447 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1448 uint64_t features) 1449 { 1450 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1451 false); 1452 } 1453 1454 static int vhost_user_set_owner(struct vhost_dev *dev) 1455 { 1456 VhostUserMsg msg = { 1457 .hdr.request = VHOST_USER_SET_OWNER, 1458 .hdr.flags = VHOST_USER_VERSION, 1459 }; 1460 1461 return vhost_user_write(dev, &msg, NULL, 0); 1462 } 1463 1464 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1465 uint64_t *max_memslots) 1466 { 1467 uint64_t backend_max_memslots; 1468 int err; 1469 1470 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1471 &backend_max_memslots); 1472 if (err < 0) { 1473 return err; 1474 } 1475 1476 *max_memslots = backend_max_memslots; 1477 1478 return 0; 1479 } 1480 1481 static int vhost_user_reset_device(struct vhost_dev *dev) 1482 { 1483 VhostUserMsg msg = { 1484 .hdr.flags = VHOST_USER_VERSION, 1485 }; 1486 1487 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1488 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1489 ? VHOST_USER_RESET_DEVICE 1490 : VHOST_USER_RESET_OWNER; 1491 1492 return vhost_user_write(dev, &msg, NULL, 0); 1493 } 1494 1495 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1496 { 1497 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1498 return -ENOSYS; 1499 } 1500 1501 return dev->config_ops->vhost_dev_config_notifier(dev); 1502 } 1503 1504 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1505 VhostUserVringArea *area, 1506 int fd) 1507 { 1508 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1509 size_t page_size = qemu_real_host_page_size; 1510 struct vhost_user *u = dev->opaque; 1511 VhostUserState *user = u->user; 1512 VirtIODevice *vdev = dev->vdev; 1513 VhostUserHostNotifier *n; 1514 void *addr; 1515 char *name; 1516 1517 if (!virtio_has_feature(dev->protocol_features, 1518 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1519 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1520 return -EINVAL; 1521 } 1522 1523 n = &user->notifier[queue_idx]; 1524 1525 if (n->addr) { 1526 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1527 object_unparent(OBJECT(&n->mr)); 1528 munmap(n->addr, page_size); 1529 n->addr = NULL; 1530 } 1531 1532 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1533 return 0; 1534 } 1535 1536 /* Sanity check. */ 1537 if (area->size != page_size) { 1538 return -EINVAL; 1539 } 1540 1541 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1542 fd, area->offset); 1543 if (addr == MAP_FAILED) { 1544 return -EFAULT; 1545 } 1546 1547 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1548 user, queue_idx); 1549 if (!n->mr.ram) /* Don't init again after suspend. */ 1550 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1551 page_size, addr); 1552 g_free(name); 1553 1554 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1555 object_unparent(OBJECT(&n->mr)); 1556 munmap(addr, page_size); 1557 return -ENXIO; 1558 } 1559 1560 n->addr = addr; 1561 n->set = true; 1562 1563 return 0; 1564 } 1565 1566 static void close_slave_channel(struct vhost_user *u) 1567 { 1568 g_source_destroy(u->slave_src); 1569 g_source_unref(u->slave_src); 1570 u->slave_src = NULL; 1571 object_unref(OBJECT(u->slave_ioc)); 1572 u->slave_ioc = NULL; 1573 } 1574 1575 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1576 gpointer opaque) 1577 { 1578 struct vhost_dev *dev = opaque; 1579 struct vhost_user *u = dev->opaque; 1580 VhostUserHeader hdr = { 0, }; 1581 VhostUserPayload payload = { 0, }; 1582 Error *local_err = NULL; 1583 gboolean rc = G_SOURCE_CONTINUE; 1584 int ret = 0; 1585 struct iovec iov; 1586 g_autofree int *fd = NULL; 1587 size_t fdsize = 0; 1588 int i; 1589 1590 /* Read header */ 1591 iov.iov_base = &hdr; 1592 iov.iov_len = VHOST_USER_HDR_SIZE; 1593 1594 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1595 error_report_err(local_err); 1596 goto err; 1597 } 1598 1599 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1600 error_report("Failed to read msg header." 1601 " Size %d exceeds the maximum %zu.", hdr.size, 1602 VHOST_USER_PAYLOAD_SIZE); 1603 goto err; 1604 } 1605 1606 /* Read payload */ 1607 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1608 error_report_err(local_err); 1609 goto err; 1610 } 1611 1612 switch (hdr.request) { 1613 case VHOST_USER_SLAVE_IOTLB_MSG: 1614 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1615 break; 1616 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1617 ret = vhost_user_slave_handle_config_change(dev); 1618 break; 1619 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1620 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1621 fd ? fd[0] : -1); 1622 break; 1623 default: 1624 error_report("Received unexpected msg type: %d.", hdr.request); 1625 ret = -EINVAL; 1626 } 1627 1628 /* 1629 * REPLY_ACK feature handling. Other reply types has to be managed 1630 * directly in their request handlers. 1631 */ 1632 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1633 struct iovec iovec[2]; 1634 1635 1636 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1637 hdr.flags |= VHOST_USER_REPLY_MASK; 1638 1639 payload.u64 = !!ret; 1640 hdr.size = sizeof(payload.u64); 1641 1642 iovec[0].iov_base = &hdr; 1643 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1644 iovec[1].iov_base = &payload; 1645 iovec[1].iov_len = hdr.size; 1646 1647 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1648 error_report_err(local_err); 1649 goto err; 1650 } 1651 } 1652 1653 goto fdcleanup; 1654 1655 err: 1656 close_slave_channel(u); 1657 rc = G_SOURCE_REMOVE; 1658 1659 fdcleanup: 1660 if (fd) { 1661 for (i = 0; i < fdsize; i++) { 1662 close(fd[i]); 1663 } 1664 } 1665 return rc; 1666 } 1667 1668 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1669 { 1670 VhostUserMsg msg = { 1671 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1672 .hdr.flags = VHOST_USER_VERSION, 1673 }; 1674 struct vhost_user *u = dev->opaque; 1675 int sv[2], ret = 0; 1676 bool reply_supported = virtio_has_feature(dev->protocol_features, 1677 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1678 Error *local_err = NULL; 1679 QIOChannel *ioc; 1680 1681 if (!virtio_has_feature(dev->protocol_features, 1682 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1683 return 0; 1684 } 1685 1686 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1687 int saved_errno = errno; 1688 error_report("socketpair() failed"); 1689 return -saved_errno; 1690 } 1691 1692 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1693 if (!ioc) { 1694 error_report_err(local_err); 1695 return -ECONNREFUSED; 1696 } 1697 u->slave_ioc = ioc; 1698 slave_update_read_handler(dev, NULL); 1699 1700 if (reply_supported) { 1701 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1702 } 1703 1704 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1705 if (ret) { 1706 goto out; 1707 } 1708 1709 if (reply_supported) { 1710 ret = process_message_reply(dev, &msg); 1711 } 1712 1713 out: 1714 close(sv[1]); 1715 if (ret) { 1716 close_slave_channel(u); 1717 } 1718 1719 return ret; 1720 } 1721 1722 #ifdef CONFIG_LINUX 1723 /* 1724 * Called back from the postcopy fault thread when a fault is received on our 1725 * ufd. 1726 * TODO: This is Linux specific 1727 */ 1728 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1729 void *ufd) 1730 { 1731 struct vhost_dev *dev = pcfd->data; 1732 struct vhost_user *u = dev->opaque; 1733 struct uffd_msg *msg = ufd; 1734 uint64_t faultaddr = msg->arg.pagefault.address; 1735 RAMBlock *rb = NULL; 1736 uint64_t rb_offset; 1737 int i; 1738 1739 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1740 dev->mem->nregions); 1741 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1742 trace_vhost_user_postcopy_fault_handler_loop(i, 1743 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1744 if (faultaddr >= u->postcopy_client_bases[i]) { 1745 /* Ofset of the fault address in the vhost region */ 1746 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1747 if (region_offset < dev->mem->regions[i].memory_size) { 1748 rb_offset = region_offset + u->region_rb_offset[i]; 1749 trace_vhost_user_postcopy_fault_handler_found(i, 1750 region_offset, rb_offset); 1751 rb = u->region_rb[i]; 1752 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1753 rb_offset); 1754 } 1755 } 1756 } 1757 error_report("%s: Failed to find region for fault %" PRIx64, 1758 __func__, faultaddr); 1759 return -1; 1760 } 1761 1762 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1763 uint64_t offset) 1764 { 1765 struct vhost_dev *dev = pcfd->data; 1766 struct vhost_user *u = dev->opaque; 1767 int i; 1768 1769 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1770 1771 if (!u) { 1772 return 0; 1773 } 1774 /* Translate the offset into an address in the clients address space */ 1775 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1776 if (u->region_rb[i] == rb && 1777 offset >= u->region_rb_offset[i] && 1778 offset < (u->region_rb_offset[i] + 1779 dev->mem->regions[i].memory_size)) { 1780 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1781 u->postcopy_client_bases[i]; 1782 trace_vhost_user_postcopy_waker_found(client_addr); 1783 return postcopy_wake_shared(pcfd, client_addr, rb); 1784 } 1785 } 1786 1787 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1788 return 0; 1789 } 1790 #endif 1791 1792 /* 1793 * Called at the start of an inbound postcopy on reception of the 1794 * 'advise' command. 1795 */ 1796 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1797 { 1798 #ifdef CONFIG_LINUX 1799 struct vhost_user *u = dev->opaque; 1800 CharBackend *chr = u->user->chr; 1801 int ufd; 1802 int ret; 1803 VhostUserMsg msg = { 1804 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1805 .hdr.flags = VHOST_USER_VERSION, 1806 }; 1807 1808 ret = vhost_user_write(dev, &msg, NULL, 0); 1809 if (ret < 0) { 1810 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1811 return ret; 1812 } 1813 1814 ret = vhost_user_read(dev, &msg); 1815 if (ret < 0) { 1816 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1817 return ret; 1818 } 1819 1820 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1821 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1822 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1823 return -EPROTO; 1824 } 1825 1826 if (msg.hdr.size) { 1827 error_setg(errp, "Received bad msg size."); 1828 return -EPROTO; 1829 } 1830 ufd = qemu_chr_fe_get_msgfd(chr); 1831 if (ufd < 0) { 1832 error_setg(errp, "%s: Failed to get ufd", __func__); 1833 return -EIO; 1834 } 1835 qemu_set_nonblock(ufd); 1836 1837 /* register ufd with userfault thread */ 1838 u->postcopy_fd.fd = ufd; 1839 u->postcopy_fd.data = dev; 1840 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1841 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1842 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1843 postcopy_register_shared_ufd(&u->postcopy_fd); 1844 return 0; 1845 #else 1846 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1847 return -ENOSYS; 1848 #endif 1849 } 1850 1851 /* 1852 * Called at the switch to postcopy on reception of the 'listen' command. 1853 */ 1854 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1855 { 1856 struct vhost_user *u = dev->opaque; 1857 int ret; 1858 VhostUserMsg msg = { 1859 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1860 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1861 }; 1862 u->postcopy_listen = true; 1863 1864 trace_vhost_user_postcopy_listen(); 1865 1866 ret = vhost_user_write(dev, &msg, NULL, 0); 1867 if (ret < 0) { 1868 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1869 return ret; 1870 } 1871 1872 ret = process_message_reply(dev, &msg); 1873 if (ret) { 1874 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1875 return ret; 1876 } 1877 1878 return 0; 1879 } 1880 1881 /* 1882 * Called at the end of postcopy 1883 */ 1884 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1885 { 1886 VhostUserMsg msg = { 1887 .hdr.request = VHOST_USER_POSTCOPY_END, 1888 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1889 }; 1890 int ret; 1891 struct vhost_user *u = dev->opaque; 1892 1893 trace_vhost_user_postcopy_end_entry(); 1894 1895 ret = vhost_user_write(dev, &msg, NULL, 0); 1896 if (ret < 0) { 1897 error_setg(errp, "Failed to send postcopy_end to vhost"); 1898 return ret; 1899 } 1900 1901 ret = process_message_reply(dev, &msg); 1902 if (ret) { 1903 error_setg(errp, "Failed to receive reply to postcopy_end"); 1904 return ret; 1905 } 1906 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1907 close(u->postcopy_fd.fd); 1908 u->postcopy_fd.handler = NULL; 1909 1910 trace_vhost_user_postcopy_end_exit(); 1911 1912 return 0; 1913 } 1914 1915 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1916 void *opaque) 1917 { 1918 struct PostcopyNotifyData *pnd = opaque; 1919 struct vhost_user *u = container_of(notifier, struct vhost_user, 1920 postcopy_notifier); 1921 struct vhost_dev *dev = u->dev; 1922 1923 switch (pnd->reason) { 1924 case POSTCOPY_NOTIFY_PROBE: 1925 if (!virtio_has_feature(dev->protocol_features, 1926 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1927 /* TODO: Get the device name into this error somehow */ 1928 error_setg(pnd->errp, 1929 "vhost-user backend not capable of postcopy"); 1930 return -ENOENT; 1931 } 1932 break; 1933 1934 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1935 return vhost_user_postcopy_advise(dev, pnd->errp); 1936 1937 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1938 return vhost_user_postcopy_listen(dev, pnd->errp); 1939 1940 case POSTCOPY_NOTIFY_INBOUND_END: 1941 return vhost_user_postcopy_end(dev, pnd->errp); 1942 1943 default: 1944 /* We ignore notifications we don't know */ 1945 break; 1946 } 1947 1948 return 0; 1949 } 1950 1951 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1952 Error **errp) 1953 { 1954 uint64_t features, protocol_features, ram_slots; 1955 struct vhost_user *u; 1956 int err; 1957 1958 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1959 1960 u = g_new0(struct vhost_user, 1); 1961 u->user = opaque; 1962 u->dev = dev; 1963 dev->opaque = u; 1964 1965 err = vhost_user_get_features(dev, &features); 1966 if (err < 0) { 1967 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1968 return err; 1969 } 1970 1971 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1972 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1973 1974 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1975 &protocol_features); 1976 if (err < 0) { 1977 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1978 return -EPROTO; 1979 } 1980 1981 dev->protocol_features = 1982 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1983 1984 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1985 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1986 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1987 } else if (!(protocol_features & 1988 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1989 error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1990 "but backend does not support it."); 1991 return -EINVAL; 1992 } 1993 1994 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1995 if (err < 0) { 1996 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 1997 return -EPROTO; 1998 } 1999 2000 /* query the max queues we support if backend supports Multiple Queue */ 2001 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2002 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2003 &dev->max_queues); 2004 if (err < 0) { 2005 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2006 return -EPROTO; 2007 } 2008 } else { 2009 dev->max_queues = 1; 2010 } 2011 2012 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2013 error_setg(errp, "The maximum number of queues supported by the " 2014 "backend is %" PRIu64, dev->max_queues); 2015 return -EINVAL; 2016 } 2017 2018 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2019 !(virtio_has_feature(dev->protocol_features, 2020 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 2021 virtio_has_feature(dev->protocol_features, 2022 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2023 error_setg(errp, "IOMMU support requires reply-ack and " 2024 "slave-req protocol features."); 2025 return -EINVAL; 2026 } 2027 2028 /* get max memory regions if backend supports configurable RAM slots */ 2029 if (!virtio_has_feature(dev->protocol_features, 2030 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2031 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2032 } else { 2033 err = vhost_user_get_max_memslots(dev, &ram_slots); 2034 if (err < 0) { 2035 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2036 return -EPROTO; 2037 } 2038 2039 if (ram_slots < u->user->memory_slots) { 2040 error_setg(errp, "The backend specified a max ram slots limit " 2041 "of %" PRIu64", when the prior validated limit was " 2042 "%d. This limit should never decrease.", ram_slots, 2043 u->user->memory_slots); 2044 return -EINVAL; 2045 } 2046 2047 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2048 } 2049 } 2050 2051 if (dev->migration_blocker == NULL && 2052 !virtio_has_feature(dev->protocol_features, 2053 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2054 error_setg(&dev->migration_blocker, 2055 "Migration disabled: vhost-user backend lacks " 2056 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2057 } 2058 2059 if (dev->vq_index == 0) { 2060 err = vhost_setup_slave_channel(dev); 2061 if (err < 0) { 2062 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2063 return -EPROTO; 2064 } 2065 } 2066 2067 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2068 postcopy_add_notifier(&u->postcopy_notifier); 2069 2070 return 0; 2071 } 2072 2073 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2074 { 2075 struct vhost_user *u; 2076 2077 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2078 2079 u = dev->opaque; 2080 if (u->postcopy_notifier.notify) { 2081 postcopy_remove_notifier(&u->postcopy_notifier); 2082 u->postcopy_notifier.notify = NULL; 2083 } 2084 u->postcopy_listen = false; 2085 if (u->postcopy_fd.handler) { 2086 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2087 close(u->postcopy_fd.fd); 2088 u->postcopy_fd.handler = NULL; 2089 } 2090 if (u->slave_ioc) { 2091 close_slave_channel(u); 2092 } 2093 g_free(u->region_rb); 2094 u->region_rb = NULL; 2095 g_free(u->region_rb_offset); 2096 u->region_rb_offset = NULL; 2097 u->region_rb_len = 0; 2098 g_free(u); 2099 dev->opaque = 0; 2100 2101 return 0; 2102 } 2103 2104 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2105 { 2106 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2107 2108 return idx; 2109 } 2110 2111 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2112 { 2113 struct vhost_user *u = dev->opaque; 2114 2115 return u->user->memory_slots; 2116 } 2117 2118 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2119 { 2120 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2121 2122 return virtio_has_feature(dev->protocol_features, 2123 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2124 } 2125 2126 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2127 { 2128 VhostUserMsg msg = { }; 2129 2130 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2131 2132 /* If guest supports GUEST_ANNOUNCE do nothing */ 2133 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2134 return 0; 2135 } 2136 2137 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2138 if (virtio_has_feature(dev->protocol_features, 2139 VHOST_USER_PROTOCOL_F_RARP)) { 2140 msg.hdr.request = VHOST_USER_SEND_RARP; 2141 msg.hdr.flags = VHOST_USER_VERSION; 2142 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2143 msg.hdr.size = sizeof(msg.payload.u64); 2144 2145 return vhost_user_write(dev, &msg, NULL, 0); 2146 } 2147 return -ENOTSUP; 2148 } 2149 2150 static bool vhost_user_can_merge(struct vhost_dev *dev, 2151 uint64_t start1, uint64_t size1, 2152 uint64_t start2, uint64_t size2) 2153 { 2154 ram_addr_t offset; 2155 int mfd, rfd; 2156 2157 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2158 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2159 2160 return mfd == rfd; 2161 } 2162 2163 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2164 { 2165 VhostUserMsg msg; 2166 bool reply_supported = virtio_has_feature(dev->protocol_features, 2167 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2168 int ret; 2169 2170 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2171 return 0; 2172 } 2173 2174 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2175 msg.payload.u64 = mtu; 2176 msg.hdr.size = sizeof(msg.payload.u64); 2177 msg.hdr.flags = VHOST_USER_VERSION; 2178 if (reply_supported) { 2179 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2180 } 2181 2182 ret = vhost_user_write(dev, &msg, NULL, 0); 2183 if (ret < 0) { 2184 return ret; 2185 } 2186 2187 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2188 if (reply_supported) { 2189 return process_message_reply(dev, &msg); 2190 } 2191 2192 return 0; 2193 } 2194 2195 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2196 struct vhost_iotlb_msg *imsg) 2197 { 2198 int ret; 2199 VhostUserMsg msg = { 2200 .hdr.request = VHOST_USER_IOTLB_MSG, 2201 .hdr.size = sizeof(msg.payload.iotlb), 2202 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2203 .payload.iotlb = *imsg, 2204 }; 2205 2206 ret = vhost_user_write(dev, &msg, NULL, 0); 2207 if (ret < 0) { 2208 return ret; 2209 } 2210 2211 return process_message_reply(dev, &msg); 2212 } 2213 2214 2215 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2216 { 2217 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2218 } 2219 2220 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2221 uint32_t config_len, Error **errp) 2222 { 2223 int ret; 2224 VhostUserMsg msg = { 2225 .hdr.request = VHOST_USER_GET_CONFIG, 2226 .hdr.flags = VHOST_USER_VERSION, 2227 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2228 }; 2229 2230 if (!virtio_has_feature(dev->protocol_features, 2231 VHOST_USER_PROTOCOL_F_CONFIG)) { 2232 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2233 return -EINVAL; 2234 } 2235 2236 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2237 2238 msg.payload.config.offset = 0; 2239 msg.payload.config.size = config_len; 2240 ret = vhost_user_write(dev, &msg, NULL, 0); 2241 if (ret < 0) { 2242 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2243 return ret; 2244 } 2245 2246 ret = vhost_user_read(dev, &msg); 2247 if (ret < 0) { 2248 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2249 return ret; 2250 } 2251 2252 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2253 error_setg(errp, 2254 "Received unexpected msg type. Expected %d received %d", 2255 VHOST_USER_GET_CONFIG, msg.hdr.request); 2256 return -EPROTO; 2257 } 2258 2259 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2260 error_setg(errp, "Received bad msg size."); 2261 return -EPROTO; 2262 } 2263 2264 memcpy(config, msg.payload.config.region, config_len); 2265 2266 return 0; 2267 } 2268 2269 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2270 uint32_t offset, uint32_t size, uint32_t flags) 2271 { 2272 int ret; 2273 uint8_t *p; 2274 bool reply_supported = virtio_has_feature(dev->protocol_features, 2275 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2276 2277 VhostUserMsg msg = { 2278 .hdr.request = VHOST_USER_SET_CONFIG, 2279 .hdr.flags = VHOST_USER_VERSION, 2280 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2281 }; 2282 2283 if (!virtio_has_feature(dev->protocol_features, 2284 VHOST_USER_PROTOCOL_F_CONFIG)) { 2285 return -ENOTSUP; 2286 } 2287 2288 if (reply_supported) { 2289 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2290 } 2291 2292 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2293 return -EINVAL; 2294 } 2295 2296 msg.payload.config.offset = offset, 2297 msg.payload.config.size = size, 2298 msg.payload.config.flags = flags, 2299 p = msg.payload.config.region; 2300 memcpy(p, data, size); 2301 2302 ret = vhost_user_write(dev, &msg, NULL, 0); 2303 if (ret < 0) { 2304 return ret; 2305 } 2306 2307 if (reply_supported) { 2308 return process_message_reply(dev, &msg); 2309 } 2310 2311 return 0; 2312 } 2313 2314 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2315 void *session_info, 2316 uint64_t *session_id) 2317 { 2318 int ret; 2319 bool crypto_session = virtio_has_feature(dev->protocol_features, 2320 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2321 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2322 VhostUserMsg msg = { 2323 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2324 .hdr.flags = VHOST_USER_VERSION, 2325 .hdr.size = sizeof(msg.payload.session), 2326 }; 2327 2328 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2329 2330 if (!crypto_session) { 2331 error_report("vhost-user trying to send unhandled ioctl"); 2332 return -ENOTSUP; 2333 } 2334 2335 memcpy(&msg.payload.session.session_setup_data, sess_info, 2336 sizeof(CryptoDevBackendSymSessionInfo)); 2337 if (sess_info->key_len) { 2338 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2339 sess_info->key_len); 2340 } 2341 if (sess_info->auth_key_len > 0) { 2342 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2343 sess_info->auth_key_len); 2344 } 2345 ret = vhost_user_write(dev, &msg, NULL, 0); 2346 if (ret < 0) { 2347 error_report("vhost_user_write() return %d, create session failed", 2348 ret); 2349 return ret; 2350 } 2351 2352 ret = vhost_user_read(dev, &msg); 2353 if (ret < 0) { 2354 error_report("vhost_user_read() return %d, create session failed", 2355 ret); 2356 return ret; 2357 } 2358 2359 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2360 error_report("Received unexpected msg type. Expected %d received %d", 2361 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2362 return -EPROTO; 2363 } 2364 2365 if (msg.hdr.size != sizeof(msg.payload.session)) { 2366 error_report("Received bad msg size."); 2367 return -EPROTO; 2368 } 2369 2370 if (msg.payload.session.session_id < 0) { 2371 error_report("Bad session id: %" PRId64 "", 2372 msg.payload.session.session_id); 2373 return -EINVAL; 2374 } 2375 *session_id = msg.payload.session.session_id; 2376 2377 return 0; 2378 } 2379 2380 static int 2381 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2382 { 2383 int ret; 2384 bool crypto_session = virtio_has_feature(dev->protocol_features, 2385 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2386 VhostUserMsg msg = { 2387 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2388 .hdr.flags = VHOST_USER_VERSION, 2389 .hdr.size = sizeof(msg.payload.u64), 2390 }; 2391 msg.payload.u64 = session_id; 2392 2393 if (!crypto_session) { 2394 error_report("vhost-user trying to send unhandled ioctl"); 2395 return -ENOTSUP; 2396 } 2397 2398 ret = vhost_user_write(dev, &msg, NULL, 0); 2399 if (ret < 0) { 2400 error_report("vhost_user_write() return %d, close session failed", 2401 ret); 2402 return ret; 2403 } 2404 2405 return 0; 2406 } 2407 2408 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2409 MemoryRegionSection *section) 2410 { 2411 bool result; 2412 2413 result = memory_region_get_fd(section->mr) >= 0; 2414 2415 return result; 2416 } 2417 2418 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2419 uint16_t queue_size, 2420 struct vhost_inflight *inflight) 2421 { 2422 void *addr; 2423 int fd; 2424 int ret; 2425 struct vhost_user *u = dev->opaque; 2426 CharBackend *chr = u->user->chr; 2427 VhostUserMsg msg = { 2428 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2429 .hdr.flags = VHOST_USER_VERSION, 2430 .payload.inflight.num_queues = dev->nvqs, 2431 .payload.inflight.queue_size = queue_size, 2432 .hdr.size = sizeof(msg.payload.inflight), 2433 }; 2434 2435 if (!virtio_has_feature(dev->protocol_features, 2436 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2437 return 0; 2438 } 2439 2440 ret = vhost_user_write(dev, &msg, NULL, 0); 2441 if (ret < 0) { 2442 return ret; 2443 } 2444 2445 ret = vhost_user_read(dev, &msg); 2446 if (ret < 0) { 2447 return ret; 2448 } 2449 2450 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2451 error_report("Received unexpected msg type. " 2452 "Expected %d received %d", 2453 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2454 return -EPROTO; 2455 } 2456 2457 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2458 error_report("Received bad msg size."); 2459 return -EPROTO; 2460 } 2461 2462 if (!msg.payload.inflight.mmap_size) { 2463 return 0; 2464 } 2465 2466 fd = qemu_chr_fe_get_msgfd(chr); 2467 if (fd < 0) { 2468 error_report("Failed to get mem fd"); 2469 return -EIO; 2470 } 2471 2472 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2473 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2474 2475 if (addr == MAP_FAILED) { 2476 error_report("Failed to mmap mem fd"); 2477 close(fd); 2478 return -EFAULT; 2479 } 2480 2481 inflight->addr = addr; 2482 inflight->fd = fd; 2483 inflight->size = msg.payload.inflight.mmap_size; 2484 inflight->offset = msg.payload.inflight.mmap_offset; 2485 inflight->queue_size = queue_size; 2486 2487 return 0; 2488 } 2489 2490 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2491 struct vhost_inflight *inflight) 2492 { 2493 VhostUserMsg msg = { 2494 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2495 .hdr.flags = VHOST_USER_VERSION, 2496 .payload.inflight.mmap_size = inflight->size, 2497 .payload.inflight.mmap_offset = inflight->offset, 2498 .payload.inflight.num_queues = dev->nvqs, 2499 .payload.inflight.queue_size = inflight->queue_size, 2500 .hdr.size = sizeof(msg.payload.inflight), 2501 }; 2502 2503 if (!virtio_has_feature(dev->protocol_features, 2504 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2505 return 0; 2506 } 2507 2508 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2509 } 2510 2511 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2512 { 2513 if (user->chr) { 2514 error_setg(errp, "Cannot initialize vhost-user state"); 2515 return false; 2516 } 2517 user->chr = chr; 2518 user->memory_slots = 0; 2519 return true; 2520 } 2521 2522 void vhost_user_cleanup(VhostUserState *user) 2523 { 2524 int i; 2525 2526 if (!user->chr) { 2527 return; 2528 } 2529 memory_region_transaction_begin(); 2530 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2531 if (user->notifier[i].addr) { 2532 object_unparent(OBJECT(&user->notifier[i].mr)); 2533 munmap(user->notifier[i].addr, qemu_real_host_page_size); 2534 user->notifier[i].addr = NULL; 2535 } 2536 } 2537 memory_region_transaction_commit(); 2538 user->chr = NULL; 2539 } 2540 2541 const VhostOps user_ops = { 2542 .backend_type = VHOST_BACKEND_TYPE_USER, 2543 .vhost_backend_init = vhost_user_backend_init, 2544 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2545 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2546 .vhost_set_log_base = vhost_user_set_log_base, 2547 .vhost_set_mem_table = vhost_user_set_mem_table, 2548 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2549 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2550 .vhost_set_vring_num = vhost_user_set_vring_num, 2551 .vhost_set_vring_base = vhost_user_set_vring_base, 2552 .vhost_get_vring_base = vhost_user_get_vring_base, 2553 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2554 .vhost_set_vring_call = vhost_user_set_vring_call, 2555 .vhost_set_features = vhost_user_set_features, 2556 .vhost_get_features = vhost_user_get_features, 2557 .vhost_set_owner = vhost_user_set_owner, 2558 .vhost_reset_device = vhost_user_reset_device, 2559 .vhost_get_vq_index = vhost_user_get_vq_index, 2560 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2561 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2562 .vhost_migration_done = vhost_user_migration_done, 2563 .vhost_backend_can_merge = vhost_user_can_merge, 2564 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2565 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2566 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2567 .vhost_get_config = vhost_user_get_config, 2568 .vhost_set_config = vhost_user_set_config, 2569 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2570 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2571 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2572 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2573 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2574 }; 2575