1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/virtio-dmabuf.h" 14 #include "hw/virtio/vhost.h" 15 #include "hw/virtio/virtio-crypto.h" 16 #include "hw/virtio/vhost-user.h" 17 #include "hw/virtio/vhost-backend.h" 18 #include "hw/virtio/virtio.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "chardev/char-fe.h" 21 #include "io/channel-socket.h" 22 #include "sysemu/kvm.h" 23 #include "qemu/error-report.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/uuid.h" 26 #include "qemu/sockets.h" 27 #include "sysemu/runstate.h" 28 #include "sysemu/cryptodev.h" 29 #include "migration/migration.h" 30 #include "migration/postcopy-ram.h" 31 #include "trace.h" 32 #include "exec/ramblock.h" 33 34 #include <sys/ioctl.h> 35 #include <sys/socket.h> 36 #include <sys/un.h> 37 38 #include "standard-headers/linux/vhost_types.h" 39 40 #ifdef CONFIG_LINUX 41 #include <linux/userfaultfd.h> 42 #endif 43 44 #define VHOST_MEMORY_BASELINE_NREGIONS 8 45 #define VHOST_USER_F_PROTOCOL_FEATURES 30 46 #define VHOST_USER_BACKEND_MAX_FDS 8 47 48 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 49 #include "hw/ppc/spapr.h" 50 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 51 52 #else 53 #define VHOST_USER_MAX_RAM_SLOTS 512 54 #endif 55 56 /* 57 * Maximum size of virtio device config space 58 */ 59 #define VHOST_USER_MAX_CONFIG_SIZE 256 60 61 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 62 63 typedef enum VhostUserRequest { 64 VHOST_USER_NONE = 0, 65 VHOST_USER_GET_FEATURES = 1, 66 VHOST_USER_SET_FEATURES = 2, 67 VHOST_USER_SET_OWNER = 3, 68 VHOST_USER_RESET_OWNER = 4, 69 VHOST_USER_SET_MEM_TABLE = 5, 70 VHOST_USER_SET_LOG_BASE = 6, 71 VHOST_USER_SET_LOG_FD = 7, 72 VHOST_USER_SET_VRING_NUM = 8, 73 VHOST_USER_SET_VRING_ADDR = 9, 74 VHOST_USER_SET_VRING_BASE = 10, 75 VHOST_USER_GET_VRING_BASE = 11, 76 VHOST_USER_SET_VRING_KICK = 12, 77 VHOST_USER_SET_VRING_CALL = 13, 78 VHOST_USER_SET_VRING_ERR = 14, 79 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 80 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 81 VHOST_USER_GET_QUEUE_NUM = 17, 82 VHOST_USER_SET_VRING_ENABLE = 18, 83 VHOST_USER_SEND_RARP = 19, 84 VHOST_USER_NET_SET_MTU = 20, 85 VHOST_USER_SET_BACKEND_REQ_FD = 21, 86 VHOST_USER_IOTLB_MSG = 22, 87 VHOST_USER_SET_VRING_ENDIAN = 23, 88 VHOST_USER_GET_CONFIG = 24, 89 VHOST_USER_SET_CONFIG = 25, 90 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 91 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 92 VHOST_USER_POSTCOPY_ADVISE = 28, 93 VHOST_USER_POSTCOPY_LISTEN = 29, 94 VHOST_USER_POSTCOPY_END = 30, 95 VHOST_USER_GET_INFLIGHT_FD = 31, 96 VHOST_USER_SET_INFLIGHT_FD = 32, 97 VHOST_USER_GPU_SET_SOCKET = 33, 98 VHOST_USER_RESET_DEVICE = 34, 99 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 100 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 101 VHOST_USER_ADD_MEM_REG = 37, 102 VHOST_USER_REM_MEM_REG = 38, 103 VHOST_USER_SET_STATUS = 39, 104 VHOST_USER_GET_STATUS = 40, 105 VHOST_USER_GET_SHARED_OBJECT = 41, 106 VHOST_USER_MAX 107 } VhostUserRequest; 108 109 typedef enum VhostUserBackendRequest { 110 VHOST_USER_BACKEND_NONE = 0, 111 VHOST_USER_BACKEND_IOTLB_MSG = 1, 112 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 113 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 114 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, 115 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, 116 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, 117 VHOST_USER_BACKEND_MAX 118 } VhostUserBackendRequest; 119 120 typedef struct VhostUserMemoryRegion { 121 uint64_t guest_phys_addr; 122 uint64_t memory_size; 123 uint64_t userspace_addr; 124 uint64_t mmap_offset; 125 } VhostUserMemoryRegion; 126 127 typedef struct VhostUserMemory { 128 uint32_t nregions; 129 uint32_t padding; 130 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 131 } VhostUserMemory; 132 133 typedef struct VhostUserMemRegMsg { 134 uint64_t padding; 135 VhostUserMemoryRegion region; 136 } VhostUserMemRegMsg; 137 138 typedef struct VhostUserLog { 139 uint64_t mmap_size; 140 uint64_t mmap_offset; 141 } VhostUserLog; 142 143 typedef struct VhostUserConfig { 144 uint32_t offset; 145 uint32_t size; 146 uint32_t flags; 147 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 148 } VhostUserConfig; 149 150 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 151 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 152 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 153 154 typedef struct VhostUserCryptoSession { 155 uint64_t op_code; 156 union { 157 struct { 158 CryptoDevBackendSymSessionInfo session_setup_data; 159 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 160 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 161 } sym; 162 struct { 163 CryptoDevBackendAsymSessionInfo session_setup_data; 164 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 165 } asym; 166 } u; 167 168 /* session id for success, -1 on errors */ 169 int64_t session_id; 170 } VhostUserCryptoSession; 171 172 static VhostUserConfig c __attribute__ ((unused)); 173 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 174 + sizeof(c.size) \ 175 + sizeof(c.flags)) 176 177 typedef struct VhostUserVringArea { 178 uint64_t u64; 179 uint64_t size; 180 uint64_t offset; 181 } VhostUserVringArea; 182 183 typedef struct VhostUserInflight { 184 uint64_t mmap_size; 185 uint64_t mmap_offset; 186 uint16_t num_queues; 187 uint16_t queue_size; 188 } VhostUserInflight; 189 190 typedef struct VhostUserShared { 191 unsigned char uuid[16]; 192 } VhostUserShared; 193 194 typedef struct { 195 VhostUserRequest request; 196 197 #define VHOST_USER_VERSION_MASK (0x3) 198 #define VHOST_USER_REPLY_MASK (0x1 << 2) 199 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 200 uint32_t flags; 201 uint32_t size; /* the following payload size */ 202 } QEMU_PACKED VhostUserHeader; 203 204 typedef union { 205 #define VHOST_USER_VRING_IDX_MASK (0xff) 206 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 207 uint64_t u64; 208 struct vhost_vring_state state; 209 struct vhost_vring_addr addr; 210 VhostUserMemory memory; 211 VhostUserMemRegMsg mem_reg; 212 VhostUserLog log; 213 struct vhost_iotlb_msg iotlb; 214 VhostUserConfig config; 215 VhostUserCryptoSession session; 216 VhostUserVringArea area; 217 VhostUserInflight inflight; 218 VhostUserShared object; 219 } VhostUserPayload; 220 221 typedef struct VhostUserMsg { 222 VhostUserHeader hdr; 223 VhostUserPayload payload; 224 } QEMU_PACKED VhostUserMsg; 225 226 static VhostUserMsg m __attribute__ ((unused)); 227 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 228 229 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 230 231 /* The version of the protocol we support */ 232 #define VHOST_USER_VERSION (0x1) 233 234 struct vhost_user { 235 struct vhost_dev *dev; 236 /* Shared between vhost devs of the same virtio device */ 237 VhostUserState *user; 238 QIOChannel *backend_ioc; 239 GSource *backend_src; 240 NotifierWithReturn postcopy_notifier; 241 struct PostCopyFD postcopy_fd; 242 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 243 /* Length of the region_rb and region_rb_offset arrays */ 244 size_t region_rb_len; 245 /* RAMBlock associated with a given region */ 246 RAMBlock **region_rb; 247 /* 248 * The offset from the start of the RAMBlock to the start of the 249 * vhost region. 250 */ 251 ram_addr_t *region_rb_offset; 252 253 /* True once we've entered postcopy_listen */ 254 bool postcopy_listen; 255 256 /* Our current regions */ 257 int num_shadow_regions; 258 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 259 }; 260 261 struct scrub_regions { 262 struct vhost_memory_region *region; 263 int reg_idx; 264 int fd_idx; 265 }; 266 267 static bool ioeventfd_enabled(void) 268 { 269 return !kvm_enabled() || kvm_eventfds_enabled(); 270 } 271 272 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 273 { 274 struct vhost_user *u = dev->opaque; 275 CharBackend *chr = u->user->chr; 276 uint8_t *p = (uint8_t *) msg; 277 int r, size = VHOST_USER_HDR_SIZE; 278 279 r = qemu_chr_fe_read_all(chr, p, size); 280 if (r != size) { 281 int saved_errno = errno; 282 error_report("Failed to read msg header. Read %d instead of %d." 283 " Original request %d.", r, size, msg->hdr.request); 284 return r < 0 ? -saved_errno : -EIO; 285 } 286 287 /* validate received flags */ 288 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 289 error_report("Failed to read msg header." 290 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 291 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 292 return -EPROTO; 293 } 294 295 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 296 297 return 0; 298 } 299 300 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 301 { 302 struct vhost_user *u = dev->opaque; 303 CharBackend *chr = u->user->chr; 304 uint8_t *p = (uint8_t *) msg; 305 int r, size; 306 307 r = vhost_user_read_header(dev, msg); 308 if (r < 0) { 309 return r; 310 } 311 312 /* validate message size is sane */ 313 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 314 error_report("Failed to read msg header." 315 " Size %d exceeds the maximum %zu.", msg->hdr.size, 316 VHOST_USER_PAYLOAD_SIZE); 317 return -EPROTO; 318 } 319 320 if (msg->hdr.size) { 321 p += VHOST_USER_HDR_SIZE; 322 size = msg->hdr.size; 323 r = qemu_chr_fe_read_all(chr, p, size); 324 if (r != size) { 325 int saved_errno = errno; 326 error_report("Failed to read msg payload." 327 " Read %d instead of %d.", r, msg->hdr.size); 328 return r < 0 ? -saved_errno : -EIO; 329 } 330 } 331 332 return 0; 333 } 334 335 static int process_message_reply(struct vhost_dev *dev, 336 const VhostUserMsg *msg) 337 { 338 int ret; 339 VhostUserMsg msg_reply; 340 341 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 342 return 0; 343 } 344 345 ret = vhost_user_read(dev, &msg_reply); 346 if (ret < 0) { 347 return ret; 348 } 349 350 if (msg_reply.hdr.request != msg->hdr.request) { 351 error_report("Received unexpected msg type. " 352 "Expected %d received %d", 353 msg->hdr.request, msg_reply.hdr.request); 354 return -EPROTO; 355 } 356 357 return msg_reply.payload.u64 ? -EIO : 0; 358 } 359 360 static bool vhost_user_per_device_request(VhostUserRequest request) 361 { 362 switch (request) { 363 case VHOST_USER_SET_OWNER: 364 case VHOST_USER_RESET_OWNER: 365 case VHOST_USER_SET_MEM_TABLE: 366 case VHOST_USER_GET_QUEUE_NUM: 367 case VHOST_USER_NET_SET_MTU: 368 case VHOST_USER_RESET_DEVICE: 369 case VHOST_USER_ADD_MEM_REG: 370 case VHOST_USER_REM_MEM_REG: 371 return true; 372 default: 373 return false; 374 } 375 } 376 377 /* most non-init callers ignore the error */ 378 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 379 int *fds, int fd_num) 380 { 381 struct vhost_user *u = dev->opaque; 382 CharBackend *chr = u->user->chr; 383 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 384 385 /* 386 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 387 * while others, like virtio-net, contain multiple vhost_devs. For 388 * operations such as configuring device memory mappings or issuing device 389 * resets, which affect the whole device instead of individual VQs, 390 * vhost-user messages should only be sent once. 391 * 392 * Devices with multiple vhost_devs are given an associated dev->vq_index 393 * so per_device requests are only sent if vq_index is 0. 394 */ 395 if (vhost_user_per_device_request(msg->hdr.request) 396 && dev->vq_index != 0) { 397 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 398 return 0; 399 } 400 401 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 402 error_report("Failed to set msg fds."); 403 return -EINVAL; 404 } 405 406 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 407 if (ret != size) { 408 int saved_errno = errno; 409 error_report("Failed to write msg." 410 " Wrote %d instead of %d.", ret, size); 411 return ret < 0 ? -saved_errno : -EIO; 412 } 413 414 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 415 416 return 0; 417 } 418 419 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 420 { 421 VhostUserMsg msg = { 422 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 423 .hdr.flags = VHOST_USER_VERSION, 424 }; 425 426 return vhost_user_write(dev, &msg, &fd, 1); 427 } 428 429 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 430 struct vhost_log *log) 431 { 432 int fds[VHOST_USER_MAX_RAM_SLOTS]; 433 size_t fd_num = 0; 434 bool shmfd = virtio_has_feature(dev->protocol_features, 435 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 436 int ret; 437 VhostUserMsg msg = { 438 .hdr.request = VHOST_USER_SET_LOG_BASE, 439 .hdr.flags = VHOST_USER_VERSION, 440 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 441 .payload.log.mmap_offset = 0, 442 .hdr.size = sizeof(msg.payload.log), 443 }; 444 445 /* Send only once with first queue pair */ 446 if (dev->vq_index != 0) { 447 return 0; 448 } 449 450 if (shmfd && log->fd != -1) { 451 fds[fd_num++] = log->fd; 452 } 453 454 ret = vhost_user_write(dev, &msg, fds, fd_num); 455 if (ret < 0) { 456 return ret; 457 } 458 459 if (shmfd) { 460 msg.hdr.size = 0; 461 ret = vhost_user_read(dev, &msg); 462 if (ret < 0) { 463 return ret; 464 } 465 466 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 467 error_report("Received unexpected msg type. " 468 "Expected %d received %d", 469 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 470 return -EPROTO; 471 } 472 } 473 474 return 0; 475 } 476 477 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 478 int *fd) 479 { 480 MemoryRegion *mr; 481 482 assert((uintptr_t)addr == addr); 483 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 484 *fd = memory_region_get_fd(mr); 485 *offset += mr->ram_block->fd_offset; 486 487 return mr; 488 } 489 490 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 491 struct vhost_memory_region *src, 492 uint64_t mmap_offset) 493 { 494 assert(src != NULL && dst != NULL); 495 dst->userspace_addr = src->userspace_addr; 496 dst->memory_size = src->memory_size; 497 dst->guest_phys_addr = src->guest_phys_addr; 498 dst->mmap_offset = mmap_offset; 499 } 500 501 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 502 struct vhost_dev *dev, 503 VhostUserMsg *msg, 504 int *fds, size_t *fd_num, 505 bool track_ramblocks) 506 { 507 int i, fd; 508 ram_addr_t offset; 509 MemoryRegion *mr; 510 struct vhost_memory_region *reg; 511 VhostUserMemoryRegion region_buffer; 512 513 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 514 515 for (i = 0; i < dev->mem->nregions; ++i) { 516 reg = dev->mem->regions + i; 517 518 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 519 if (fd > 0) { 520 if (track_ramblocks) { 521 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 522 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 523 reg->memory_size, 524 reg->guest_phys_addr, 525 reg->userspace_addr, 526 offset); 527 u->region_rb_offset[i] = offset; 528 u->region_rb[i] = mr->ram_block; 529 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 530 error_report("Failed preparing vhost-user memory table msg"); 531 return -ENOBUFS; 532 } 533 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 534 msg->payload.memory.regions[*fd_num] = region_buffer; 535 fds[(*fd_num)++] = fd; 536 } else if (track_ramblocks) { 537 u->region_rb_offset[i] = 0; 538 u->region_rb[i] = NULL; 539 } 540 } 541 542 msg->payload.memory.nregions = *fd_num; 543 544 if (!*fd_num) { 545 error_report("Failed initializing vhost-user memory map, " 546 "consider using -object memory-backend-file share=on"); 547 return -EINVAL; 548 } 549 550 msg->hdr.size = sizeof(msg->payload.memory.nregions); 551 msg->hdr.size += sizeof(msg->payload.memory.padding); 552 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 553 554 return 0; 555 } 556 557 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 558 struct vhost_memory_region *vdev_reg) 559 { 560 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 561 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 562 shadow_reg->memory_size == vdev_reg->memory_size; 563 } 564 565 static void scrub_shadow_regions(struct vhost_dev *dev, 566 struct scrub_regions *add_reg, 567 int *nr_add_reg, 568 struct scrub_regions *rem_reg, 569 int *nr_rem_reg, uint64_t *shadow_pcb, 570 bool track_ramblocks) 571 { 572 struct vhost_user *u = dev->opaque; 573 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 574 struct vhost_memory_region *reg, *shadow_reg; 575 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 576 ram_addr_t offset; 577 MemoryRegion *mr; 578 bool matching; 579 580 /* 581 * Find memory regions present in our shadow state which are not in 582 * the device's current memory state. 583 * 584 * Mark regions in both the shadow and device state as "found". 585 */ 586 for (i = 0; i < u->num_shadow_regions; i++) { 587 shadow_reg = &u->shadow_regions[i]; 588 matching = false; 589 590 for (j = 0; j < dev->mem->nregions; j++) { 591 reg = &dev->mem->regions[j]; 592 593 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 594 595 if (reg_equal(shadow_reg, reg)) { 596 matching = true; 597 found[j] = true; 598 if (track_ramblocks) { 599 /* 600 * Reset postcopy client bases, region_rb, and 601 * region_rb_offset in case regions are removed. 602 */ 603 if (fd > 0) { 604 u->region_rb_offset[j] = offset; 605 u->region_rb[j] = mr->ram_block; 606 shadow_pcb[j] = u->postcopy_client_bases[i]; 607 } else { 608 u->region_rb_offset[j] = 0; 609 u->region_rb[j] = NULL; 610 } 611 } 612 break; 613 } 614 } 615 616 /* 617 * If the region was not found in the current device memory state 618 * create an entry for it in the removed list. 619 */ 620 if (!matching) { 621 rem_reg[rm_idx].region = shadow_reg; 622 rem_reg[rm_idx++].reg_idx = i; 623 } 624 } 625 626 /* 627 * For regions not marked "found", create entries in the added list. 628 * 629 * Note their indexes in the device memory state and the indexes of their 630 * file descriptors. 631 */ 632 for (i = 0; i < dev->mem->nregions; i++) { 633 reg = &dev->mem->regions[i]; 634 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 635 if (fd > 0) { 636 ++fd_num; 637 } 638 639 /* 640 * If the region was in both the shadow and device state we don't 641 * need to send a VHOST_USER_ADD_MEM_REG message for it. 642 */ 643 if (found[i]) { 644 continue; 645 } 646 647 add_reg[add_idx].region = reg; 648 add_reg[add_idx].reg_idx = i; 649 add_reg[add_idx++].fd_idx = fd_num; 650 } 651 *nr_rem_reg = rm_idx; 652 *nr_add_reg = add_idx; 653 654 return; 655 } 656 657 static int send_remove_regions(struct vhost_dev *dev, 658 struct scrub_regions *remove_reg, 659 int nr_rem_reg, VhostUserMsg *msg, 660 bool reply_supported) 661 { 662 struct vhost_user *u = dev->opaque; 663 struct vhost_memory_region *shadow_reg; 664 int i, fd, shadow_reg_idx, ret; 665 ram_addr_t offset; 666 VhostUserMemoryRegion region_buffer; 667 668 /* 669 * The regions in remove_reg appear in the same order they do in the 670 * shadow table. Therefore we can minimize memory copies by iterating 671 * through remove_reg backwards. 672 */ 673 for (i = nr_rem_reg - 1; i >= 0; i--) { 674 shadow_reg = remove_reg[i].region; 675 shadow_reg_idx = remove_reg[i].reg_idx; 676 677 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 678 679 if (fd > 0) { 680 msg->hdr.request = VHOST_USER_REM_MEM_REG; 681 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 682 msg->payload.mem_reg.region = region_buffer; 683 684 ret = vhost_user_write(dev, msg, NULL, 0); 685 if (ret < 0) { 686 return ret; 687 } 688 689 if (reply_supported) { 690 ret = process_message_reply(dev, msg); 691 if (ret) { 692 return ret; 693 } 694 } 695 } 696 697 /* 698 * At this point we know the backend has unmapped the region. It is now 699 * safe to remove it from the shadow table. 700 */ 701 memmove(&u->shadow_regions[shadow_reg_idx], 702 &u->shadow_regions[shadow_reg_idx + 1], 703 sizeof(struct vhost_memory_region) * 704 (u->num_shadow_regions - shadow_reg_idx - 1)); 705 u->num_shadow_regions--; 706 } 707 708 return 0; 709 } 710 711 static int send_add_regions(struct vhost_dev *dev, 712 struct scrub_regions *add_reg, int nr_add_reg, 713 VhostUserMsg *msg, uint64_t *shadow_pcb, 714 bool reply_supported, bool track_ramblocks) 715 { 716 struct vhost_user *u = dev->opaque; 717 int i, fd, ret, reg_idx, reg_fd_idx; 718 struct vhost_memory_region *reg; 719 MemoryRegion *mr; 720 ram_addr_t offset; 721 VhostUserMsg msg_reply; 722 VhostUserMemoryRegion region_buffer; 723 724 for (i = 0; i < nr_add_reg; i++) { 725 reg = add_reg[i].region; 726 reg_idx = add_reg[i].reg_idx; 727 reg_fd_idx = add_reg[i].fd_idx; 728 729 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 730 731 if (fd > 0) { 732 if (track_ramblocks) { 733 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 734 reg->memory_size, 735 reg->guest_phys_addr, 736 reg->userspace_addr, 737 offset); 738 u->region_rb_offset[reg_idx] = offset; 739 u->region_rb[reg_idx] = mr->ram_block; 740 } 741 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 742 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 743 msg->payload.mem_reg.region = region_buffer; 744 745 ret = vhost_user_write(dev, msg, &fd, 1); 746 if (ret < 0) { 747 return ret; 748 } 749 750 if (track_ramblocks) { 751 uint64_t reply_gpa; 752 753 ret = vhost_user_read(dev, &msg_reply); 754 if (ret < 0) { 755 return ret; 756 } 757 758 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 759 760 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 761 error_report("%s: Received unexpected msg type." 762 "Expected %d received %d", __func__, 763 VHOST_USER_ADD_MEM_REG, 764 msg_reply.hdr.request); 765 return -EPROTO; 766 } 767 768 /* 769 * We're using the same structure, just reusing one of the 770 * fields, so it should be the same size. 771 */ 772 if (msg_reply.hdr.size != msg->hdr.size) { 773 error_report("%s: Unexpected size for postcopy reply " 774 "%d vs %d", __func__, msg_reply.hdr.size, 775 msg->hdr.size); 776 return -EPROTO; 777 } 778 779 /* Get the postcopy client base from the backend's reply. */ 780 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 781 shadow_pcb[reg_idx] = 782 msg_reply.payload.mem_reg.region.userspace_addr; 783 trace_vhost_user_set_mem_table_postcopy( 784 msg_reply.payload.mem_reg.region.userspace_addr, 785 msg->payload.mem_reg.region.userspace_addr, 786 reg_fd_idx, reg_idx); 787 } else { 788 error_report("%s: invalid postcopy reply for region. " 789 "Got guest physical address %" PRIX64 ", expected " 790 "%" PRIX64, __func__, reply_gpa, 791 dev->mem->regions[reg_idx].guest_phys_addr); 792 return -EPROTO; 793 } 794 } else if (reply_supported) { 795 ret = process_message_reply(dev, msg); 796 if (ret) { 797 return ret; 798 } 799 } 800 } else if (track_ramblocks) { 801 u->region_rb_offset[reg_idx] = 0; 802 u->region_rb[reg_idx] = NULL; 803 } 804 805 /* 806 * At this point, we know the backend has mapped in the new 807 * region, if the region has a valid file descriptor. 808 * 809 * The region should now be added to the shadow table. 810 */ 811 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 812 reg->guest_phys_addr; 813 u->shadow_regions[u->num_shadow_regions].userspace_addr = 814 reg->userspace_addr; 815 u->shadow_regions[u->num_shadow_regions].memory_size = 816 reg->memory_size; 817 u->num_shadow_regions++; 818 } 819 820 return 0; 821 } 822 823 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 824 VhostUserMsg *msg, 825 bool reply_supported, 826 bool track_ramblocks) 827 { 828 struct vhost_user *u = dev->opaque; 829 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 830 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 831 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 832 int nr_add_reg, nr_rem_reg; 833 int ret; 834 835 msg->hdr.size = sizeof(msg->payload.mem_reg); 836 837 /* Find the regions which need to be removed or added. */ 838 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 839 shadow_pcb, track_ramblocks); 840 841 if (nr_rem_reg) { 842 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 843 reply_supported); 844 if (ret < 0) { 845 goto err; 846 } 847 } 848 849 if (nr_add_reg) { 850 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 851 reply_supported, track_ramblocks); 852 if (ret < 0) { 853 goto err; 854 } 855 } 856 857 if (track_ramblocks) { 858 memcpy(u->postcopy_client_bases, shadow_pcb, 859 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 860 /* 861 * Now we've registered this with the postcopy code, we ack to the 862 * client, because now we're in the position to be able to deal with 863 * any faults it generates. 864 */ 865 /* TODO: Use this for failure cases as well with a bad value. */ 866 msg->hdr.size = sizeof(msg->payload.u64); 867 msg->payload.u64 = 0; /* OK */ 868 869 ret = vhost_user_write(dev, msg, NULL, 0); 870 if (ret < 0) { 871 return ret; 872 } 873 } 874 875 return 0; 876 877 err: 878 if (track_ramblocks) { 879 memcpy(u->postcopy_client_bases, shadow_pcb, 880 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 881 } 882 883 return ret; 884 } 885 886 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 887 struct vhost_memory *mem, 888 bool reply_supported, 889 bool config_mem_slots) 890 { 891 struct vhost_user *u = dev->opaque; 892 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 893 size_t fd_num = 0; 894 VhostUserMsg msg_reply; 895 int region_i, msg_i; 896 int ret; 897 898 VhostUserMsg msg = { 899 .hdr.flags = VHOST_USER_VERSION, 900 }; 901 902 if (u->region_rb_len < dev->mem->nregions) { 903 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 904 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 905 dev->mem->nregions); 906 memset(&(u->region_rb[u->region_rb_len]), '\0', 907 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 908 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 909 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 910 u->region_rb_len = dev->mem->nregions; 911 } 912 913 if (config_mem_slots) { 914 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 915 if (ret < 0) { 916 return ret; 917 } 918 } else { 919 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 920 true); 921 if (ret < 0) { 922 return ret; 923 } 924 925 ret = vhost_user_write(dev, &msg, fds, fd_num); 926 if (ret < 0) { 927 return ret; 928 } 929 930 ret = vhost_user_read(dev, &msg_reply); 931 if (ret < 0) { 932 return ret; 933 } 934 935 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 936 error_report("%s: Received unexpected msg type." 937 "Expected %d received %d", __func__, 938 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 939 return -EPROTO; 940 } 941 942 /* 943 * We're using the same structure, just reusing one of the 944 * fields, so it should be the same size. 945 */ 946 if (msg_reply.hdr.size != msg.hdr.size) { 947 error_report("%s: Unexpected size for postcopy reply " 948 "%d vs %d", __func__, msg_reply.hdr.size, 949 msg.hdr.size); 950 return -EPROTO; 951 } 952 953 memset(u->postcopy_client_bases, 0, 954 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 955 956 /* 957 * They're in the same order as the regions that were sent 958 * but some of the regions were skipped (above) if they 959 * didn't have fd's 960 */ 961 for (msg_i = 0, region_i = 0; 962 region_i < dev->mem->nregions; 963 region_i++) { 964 if (msg_i < fd_num && 965 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 966 dev->mem->regions[region_i].guest_phys_addr) { 967 u->postcopy_client_bases[region_i] = 968 msg_reply.payload.memory.regions[msg_i].userspace_addr; 969 trace_vhost_user_set_mem_table_postcopy( 970 msg_reply.payload.memory.regions[msg_i].userspace_addr, 971 msg.payload.memory.regions[msg_i].userspace_addr, 972 msg_i, region_i); 973 msg_i++; 974 } 975 } 976 if (msg_i != fd_num) { 977 error_report("%s: postcopy reply not fully consumed " 978 "%d vs %zd", 979 __func__, msg_i, fd_num); 980 return -EIO; 981 } 982 983 /* 984 * Now we've registered this with the postcopy code, we ack to the 985 * client, because now we're in the position to be able to deal 986 * with any faults it generates. 987 */ 988 /* TODO: Use this for failure cases as well with a bad value. */ 989 msg.hdr.size = sizeof(msg.payload.u64); 990 msg.payload.u64 = 0; /* OK */ 991 ret = vhost_user_write(dev, &msg, NULL, 0); 992 if (ret < 0) { 993 return ret; 994 } 995 } 996 997 return 0; 998 } 999 1000 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1001 struct vhost_memory *mem) 1002 { 1003 struct vhost_user *u = dev->opaque; 1004 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1005 size_t fd_num = 0; 1006 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1007 bool reply_supported = virtio_has_feature(dev->protocol_features, 1008 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1009 bool config_mem_slots = 1010 virtio_has_feature(dev->protocol_features, 1011 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1012 int ret; 1013 1014 if (do_postcopy) { 1015 /* 1016 * Postcopy has enough differences that it's best done in it's own 1017 * version 1018 */ 1019 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1020 config_mem_slots); 1021 } 1022 1023 VhostUserMsg msg = { 1024 .hdr.flags = VHOST_USER_VERSION, 1025 }; 1026 1027 if (reply_supported) { 1028 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1029 } 1030 1031 if (config_mem_slots) { 1032 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1033 if (ret < 0) { 1034 return ret; 1035 } 1036 } else { 1037 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1038 false); 1039 if (ret < 0) { 1040 return ret; 1041 } 1042 1043 ret = vhost_user_write(dev, &msg, fds, fd_num); 1044 if (ret < 0) { 1045 return ret; 1046 } 1047 1048 if (reply_supported) { 1049 return process_message_reply(dev, &msg); 1050 } 1051 } 1052 1053 return 0; 1054 } 1055 1056 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1057 struct vhost_vring_state *ring) 1058 { 1059 bool cross_endian = virtio_has_feature(dev->protocol_features, 1060 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1061 VhostUserMsg msg = { 1062 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1063 .hdr.flags = VHOST_USER_VERSION, 1064 .payload.state = *ring, 1065 .hdr.size = sizeof(msg.payload.state), 1066 }; 1067 1068 if (!cross_endian) { 1069 error_report("vhost-user trying to send unhandled ioctl"); 1070 return -ENOTSUP; 1071 } 1072 1073 return vhost_user_write(dev, &msg, NULL, 0); 1074 } 1075 1076 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1077 { 1078 int ret; 1079 VhostUserMsg msg = { 1080 .hdr.request = request, 1081 .hdr.flags = VHOST_USER_VERSION, 1082 }; 1083 1084 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1085 return 0; 1086 } 1087 1088 ret = vhost_user_write(dev, &msg, NULL, 0); 1089 if (ret < 0) { 1090 return ret; 1091 } 1092 1093 ret = vhost_user_read(dev, &msg); 1094 if (ret < 0) { 1095 return ret; 1096 } 1097 1098 if (msg.hdr.request != request) { 1099 error_report("Received unexpected msg type. Expected %d received %d", 1100 request, msg.hdr.request); 1101 return -EPROTO; 1102 } 1103 1104 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1105 error_report("Received bad msg size."); 1106 return -EPROTO; 1107 } 1108 1109 *u64 = msg.payload.u64; 1110 1111 return 0; 1112 } 1113 1114 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1115 { 1116 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1117 return -EPROTO; 1118 } 1119 1120 return 0; 1121 } 1122 1123 /* Note: "msg->hdr.flags" may be modified. */ 1124 static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, 1125 bool wait_for_reply) 1126 { 1127 int ret; 1128 1129 if (wait_for_reply) { 1130 bool reply_supported = virtio_has_feature(dev->protocol_features, 1131 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1132 if (reply_supported) { 1133 msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1134 } 1135 } 1136 1137 ret = vhost_user_write(dev, msg, NULL, 0); 1138 if (ret < 0) { 1139 return ret; 1140 } 1141 1142 if (wait_for_reply) { 1143 uint64_t dummy; 1144 1145 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1146 return process_message_reply(dev, msg); 1147 } 1148 1149 /* 1150 * We need to wait for a reply but the backend does not 1151 * support replies for the command we just sent. 1152 * Send VHOST_USER_GET_FEATURES which makes all backends 1153 * send a reply. 1154 */ 1155 return vhost_user_get_features(dev, &dummy); 1156 } 1157 1158 return 0; 1159 } 1160 1161 static int vhost_set_vring(struct vhost_dev *dev, 1162 unsigned long int request, 1163 struct vhost_vring_state *ring, 1164 bool wait_for_reply) 1165 { 1166 VhostUserMsg msg = { 1167 .hdr.request = request, 1168 .hdr.flags = VHOST_USER_VERSION, 1169 .payload.state = *ring, 1170 .hdr.size = sizeof(msg.payload.state), 1171 }; 1172 1173 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1174 } 1175 1176 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1177 struct vhost_vring_state *ring) 1178 { 1179 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); 1180 } 1181 1182 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1183 { 1184 assert(n && n->unmap_addr); 1185 munmap(n->unmap_addr, qemu_real_host_page_size()); 1186 n->unmap_addr = NULL; 1187 } 1188 1189 /* 1190 * clean-up function for notifier, will finally free the structure 1191 * under rcu. 1192 */ 1193 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1194 VirtIODevice *vdev) 1195 { 1196 if (n->addr) { 1197 if (vdev) { 1198 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1199 } 1200 assert(!n->unmap_addr); 1201 n->unmap_addr = n->addr; 1202 n->addr = NULL; 1203 call_rcu(n, vhost_user_host_notifier_free, rcu); 1204 } 1205 } 1206 1207 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1208 struct vhost_vring_state *ring) 1209 { 1210 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); 1211 } 1212 1213 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1214 { 1215 int i; 1216 1217 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1218 return -EINVAL; 1219 } 1220 1221 for (i = 0; i < dev->nvqs; ++i) { 1222 int ret; 1223 struct vhost_vring_state state = { 1224 .index = dev->vq_index + i, 1225 .num = enable, 1226 }; 1227 1228 /* 1229 * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / 1230 * control plane thread via unix domain socket. Virtio requests travel 1231 * from guest to vhost-user backend / data plane thread via eventfd. 1232 * Even if the guest enables the ring first, and pushes its first virtio 1233 * request second (conforming to the virtio spec), the data plane thread 1234 * in the backend may see the virtio request before the control plane 1235 * thread sees the queue enablement. This causes (in fact, requires) the 1236 * data plane thread to discard the virtio request (it arrived on a 1237 * seemingly disabled queue). To prevent this out-of-order delivery, 1238 * don't let the guest proceed to pushing the virtio request until the 1239 * backend control plane acknowledges enabling the queue -- IOW, pass 1240 * wait_for_reply=true below. 1241 */ 1242 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); 1243 if (ret < 0) { 1244 /* 1245 * Restoring the previous state is likely infeasible, as well as 1246 * proceeding regardless the error, so just bail out and hope for 1247 * the device-level recovery. 1248 */ 1249 return ret; 1250 } 1251 } 1252 1253 return 0; 1254 } 1255 1256 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1257 int idx) 1258 { 1259 if (idx >= u->notifiers->len) { 1260 return NULL; 1261 } 1262 return g_ptr_array_index(u->notifiers, idx); 1263 } 1264 1265 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1266 struct vhost_vring_state *ring) 1267 { 1268 int ret; 1269 VhostUserMsg msg = { 1270 .hdr.request = VHOST_USER_GET_VRING_BASE, 1271 .hdr.flags = VHOST_USER_VERSION, 1272 .payload.state = *ring, 1273 .hdr.size = sizeof(msg.payload.state), 1274 }; 1275 struct vhost_user *u = dev->opaque; 1276 1277 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1278 if (n) { 1279 vhost_user_host_notifier_remove(n, dev->vdev); 1280 } 1281 1282 ret = vhost_user_write(dev, &msg, NULL, 0); 1283 if (ret < 0) { 1284 return ret; 1285 } 1286 1287 ret = vhost_user_read(dev, &msg); 1288 if (ret < 0) { 1289 return ret; 1290 } 1291 1292 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1293 error_report("Received unexpected msg type. Expected %d received %d", 1294 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1295 return -EPROTO; 1296 } 1297 1298 if (msg.hdr.size != sizeof(msg.payload.state)) { 1299 error_report("Received bad msg size."); 1300 return -EPROTO; 1301 } 1302 1303 *ring = msg.payload.state; 1304 1305 return 0; 1306 } 1307 1308 static int vhost_set_vring_file(struct vhost_dev *dev, 1309 VhostUserRequest request, 1310 struct vhost_vring_file *file) 1311 { 1312 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1313 size_t fd_num = 0; 1314 VhostUserMsg msg = { 1315 .hdr.request = request, 1316 .hdr.flags = VHOST_USER_VERSION, 1317 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1318 .hdr.size = sizeof(msg.payload.u64), 1319 }; 1320 1321 if (ioeventfd_enabled() && file->fd > 0) { 1322 fds[fd_num++] = file->fd; 1323 } else { 1324 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1325 } 1326 1327 return vhost_user_write(dev, &msg, fds, fd_num); 1328 } 1329 1330 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1331 struct vhost_vring_file *file) 1332 { 1333 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1334 } 1335 1336 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1337 struct vhost_vring_file *file) 1338 { 1339 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1340 } 1341 1342 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1343 struct vhost_vring_file *file) 1344 { 1345 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1346 } 1347 1348 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1349 struct vhost_vring_addr *addr) 1350 { 1351 VhostUserMsg msg = { 1352 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1353 .hdr.flags = VHOST_USER_VERSION, 1354 .payload.addr = *addr, 1355 .hdr.size = sizeof(msg.payload.addr), 1356 }; 1357 1358 /* 1359 * wait for a reply if logging is enabled to make sure 1360 * backend is actually logging changes 1361 */ 1362 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1363 1364 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1365 } 1366 1367 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1368 bool wait_for_reply) 1369 { 1370 VhostUserMsg msg = { 1371 .hdr.request = request, 1372 .hdr.flags = VHOST_USER_VERSION, 1373 .payload.u64 = u64, 1374 .hdr.size = sizeof(msg.payload.u64), 1375 }; 1376 1377 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1378 } 1379 1380 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1381 { 1382 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1383 } 1384 1385 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1386 { 1387 uint64_t value; 1388 int ret; 1389 1390 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1391 if (ret < 0) { 1392 return ret; 1393 } 1394 *status = value; 1395 1396 return 0; 1397 } 1398 1399 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1400 { 1401 uint8_t s; 1402 int ret; 1403 1404 ret = vhost_user_get_status(dev, &s); 1405 if (ret < 0) { 1406 return ret; 1407 } 1408 1409 if ((s & status) == status) { 1410 return 0; 1411 } 1412 s |= status; 1413 1414 return vhost_user_set_status(dev, s); 1415 } 1416 1417 static int vhost_user_set_features(struct vhost_dev *dev, 1418 uint64_t features) 1419 { 1420 /* 1421 * wait for a reply if logging is enabled to make sure 1422 * backend is actually logging changes 1423 */ 1424 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1425 int ret; 1426 1427 /* 1428 * We need to include any extra backend only feature bits that 1429 * might be needed by our device. Currently this includes the 1430 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1431 * features. 1432 */ 1433 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1434 features | dev->backend_features, 1435 log_enabled); 1436 1437 if (virtio_has_feature(dev->protocol_features, 1438 VHOST_USER_PROTOCOL_F_STATUS)) { 1439 if (!ret) { 1440 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1441 } 1442 } 1443 1444 return ret; 1445 } 1446 1447 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1448 uint64_t features) 1449 { 1450 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1451 false); 1452 } 1453 1454 static int vhost_user_set_owner(struct vhost_dev *dev) 1455 { 1456 VhostUserMsg msg = { 1457 .hdr.request = VHOST_USER_SET_OWNER, 1458 .hdr.flags = VHOST_USER_VERSION, 1459 }; 1460 1461 return vhost_user_write(dev, &msg, NULL, 0); 1462 } 1463 1464 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1465 uint64_t *max_memslots) 1466 { 1467 uint64_t backend_max_memslots; 1468 int err; 1469 1470 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1471 &backend_max_memslots); 1472 if (err < 0) { 1473 return err; 1474 } 1475 1476 *max_memslots = backend_max_memslots; 1477 1478 return 0; 1479 } 1480 1481 static int vhost_user_reset_device(struct vhost_dev *dev) 1482 { 1483 VhostUserMsg msg = { 1484 .hdr.flags = VHOST_USER_VERSION, 1485 .hdr.request = VHOST_USER_RESET_DEVICE, 1486 }; 1487 1488 /* 1489 * Historically, reset was not implemented so only reset devices 1490 * that are expecting it. 1491 */ 1492 if (!virtio_has_feature(dev->protocol_features, 1493 VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { 1494 return -ENOSYS; 1495 } 1496 1497 return vhost_user_write(dev, &msg, NULL, 0); 1498 } 1499 1500 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1501 { 1502 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1503 return -ENOSYS; 1504 } 1505 1506 return dev->config_ops->vhost_dev_config_notifier(dev); 1507 } 1508 1509 /* 1510 * Fetch or create the notifier for a given idx. Newly created 1511 * notifiers are added to the pointer array that tracks them. 1512 */ 1513 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1514 int idx) 1515 { 1516 VhostUserHostNotifier *n = NULL; 1517 if (idx >= u->notifiers->len) { 1518 g_ptr_array_set_size(u->notifiers, idx + 1); 1519 } 1520 1521 n = g_ptr_array_index(u->notifiers, idx); 1522 if (!n) { 1523 /* 1524 * In case notification arrive out-of-order, 1525 * make room for current index. 1526 */ 1527 g_ptr_array_remove_index(u->notifiers, idx); 1528 n = g_new0(VhostUserHostNotifier, 1); 1529 n->idx = idx; 1530 g_ptr_array_insert(u->notifiers, idx, n); 1531 trace_vhost_user_create_notifier(idx, n); 1532 } 1533 1534 return n; 1535 } 1536 1537 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1538 VhostUserVringArea *area, 1539 int fd) 1540 { 1541 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1542 size_t page_size = qemu_real_host_page_size(); 1543 struct vhost_user *u = dev->opaque; 1544 VhostUserState *user = u->user; 1545 VirtIODevice *vdev = dev->vdev; 1546 VhostUserHostNotifier *n; 1547 void *addr; 1548 char *name; 1549 1550 if (!virtio_has_feature(dev->protocol_features, 1551 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1552 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1553 return -EINVAL; 1554 } 1555 1556 /* 1557 * Fetch notifier and invalidate any old data before setting up 1558 * new mapped address. 1559 */ 1560 n = fetch_or_create_notifier(user, queue_idx); 1561 vhost_user_host_notifier_remove(n, vdev); 1562 1563 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1564 return 0; 1565 } 1566 1567 /* Sanity check. */ 1568 if (area->size != page_size) { 1569 return -EINVAL; 1570 } 1571 1572 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1573 fd, area->offset); 1574 if (addr == MAP_FAILED) { 1575 return -EFAULT; 1576 } 1577 1578 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1579 user, queue_idx); 1580 if (!n->mr.ram) { /* Don't init again after suspend. */ 1581 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1582 page_size, addr); 1583 } else { 1584 n->mr.ram_block->host = addr; 1585 } 1586 g_free(name); 1587 1588 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1589 object_unparent(OBJECT(&n->mr)); 1590 munmap(addr, page_size); 1591 return -ENXIO; 1592 } 1593 1594 n->addr = addr; 1595 1596 return 0; 1597 } 1598 1599 static int 1600 vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, 1601 VhostUserShared *object) 1602 { 1603 QemuUUID uuid; 1604 1605 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1606 return virtio_add_vhost_device(&uuid, dev); 1607 } 1608 1609 static int 1610 vhost_user_backend_handle_shared_object_remove(VhostUserShared *object) 1611 { 1612 QemuUUID uuid; 1613 1614 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1615 return virtio_remove_resource(&uuid); 1616 } 1617 1618 static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, 1619 VhostUserPayload *payload, Error **errp) 1620 { 1621 struct iovec iov[] = { 1622 { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, 1623 { .iov_base = payload, .iov_len = hdr->size }, 1624 }; 1625 1626 hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; 1627 hdr->flags |= VHOST_USER_REPLY_MASK; 1628 1629 return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); 1630 } 1631 1632 static bool 1633 vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, 1634 VhostUserPayload *payload, Error **errp) 1635 { 1636 hdr->size = sizeof(payload->u64); 1637 return vhost_user_send_resp(ioc, hdr, payload, errp); 1638 } 1639 1640 int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, 1641 int *dmabuf_fd) 1642 { 1643 struct vhost_user *u = dev->opaque; 1644 CharBackend *chr = u->user->chr; 1645 int ret; 1646 VhostUserMsg msg = { 1647 .hdr.request = VHOST_USER_GET_SHARED_OBJECT, 1648 .hdr.flags = VHOST_USER_VERSION, 1649 }; 1650 memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); 1651 1652 ret = vhost_user_write(dev, &msg, NULL, 0); 1653 if (ret < 0) { 1654 return ret; 1655 } 1656 1657 ret = vhost_user_read(dev, &msg); 1658 if (ret < 0) { 1659 return ret; 1660 } 1661 1662 if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { 1663 error_report("Received unexpected msg type. " 1664 "Expected %d received %d", 1665 VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); 1666 return -EPROTO; 1667 } 1668 1669 *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); 1670 if (*dmabuf_fd < 0) { 1671 error_report("Failed to get dmabuf fd"); 1672 return -EIO; 1673 } 1674 1675 return 0; 1676 } 1677 1678 static int 1679 vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, 1680 QIOChannel *ioc, 1681 VhostUserHeader *hdr, 1682 VhostUserPayload *payload) 1683 { 1684 QemuUUID uuid; 1685 CharBackend *chr = u->user->chr; 1686 Error *local_err = NULL; 1687 int dmabuf_fd = -1; 1688 int fd_num = 0; 1689 1690 memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); 1691 1692 payload->u64 = 0; 1693 switch (virtio_object_type(&uuid)) { 1694 case TYPE_DMABUF: 1695 dmabuf_fd = virtio_lookup_dmabuf(&uuid); 1696 break; 1697 case TYPE_VHOST_DEV: 1698 { 1699 struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); 1700 if (dev == NULL) { 1701 payload->u64 = -EINVAL; 1702 break; 1703 } 1704 int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); 1705 if (ret < 0) { 1706 payload->u64 = ret; 1707 } 1708 break; 1709 } 1710 case TYPE_INVALID: 1711 payload->u64 = -EINVAL; 1712 break; 1713 } 1714 1715 if (dmabuf_fd != -1) { 1716 fd_num++; 1717 } 1718 1719 if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { 1720 error_report("Failed to set msg fds."); 1721 payload->u64 = -EINVAL; 1722 } 1723 1724 if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { 1725 error_report_err(local_err); 1726 return -EINVAL; 1727 } 1728 1729 return 0; 1730 } 1731 1732 static void close_backend_channel(struct vhost_user *u) 1733 { 1734 g_source_destroy(u->backend_src); 1735 g_source_unref(u->backend_src); 1736 u->backend_src = NULL; 1737 object_unref(OBJECT(u->backend_ioc)); 1738 u->backend_ioc = NULL; 1739 } 1740 1741 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1742 gpointer opaque) 1743 { 1744 struct vhost_dev *dev = opaque; 1745 struct vhost_user *u = dev->opaque; 1746 VhostUserHeader hdr = { 0, }; 1747 VhostUserPayload payload = { 0, }; 1748 Error *local_err = NULL; 1749 gboolean rc = G_SOURCE_CONTINUE; 1750 int ret = 0; 1751 struct iovec iov; 1752 g_autofree int *fd = NULL; 1753 size_t fdsize = 0; 1754 int i; 1755 1756 /* Read header */ 1757 iov.iov_base = &hdr; 1758 iov.iov_len = VHOST_USER_HDR_SIZE; 1759 1760 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1761 error_report_err(local_err); 1762 goto err; 1763 } 1764 1765 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1766 error_report("Failed to read msg header." 1767 " Size %d exceeds the maximum %zu.", hdr.size, 1768 VHOST_USER_PAYLOAD_SIZE); 1769 goto err; 1770 } 1771 1772 /* Read payload */ 1773 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1774 error_report_err(local_err); 1775 goto err; 1776 } 1777 1778 switch (hdr.request) { 1779 case VHOST_USER_BACKEND_IOTLB_MSG: 1780 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1781 break; 1782 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1783 ret = vhost_user_backend_handle_config_change(dev); 1784 break; 1785 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1786 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1787 fd ? fd[0] : -1); 1788 break; 1789 case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: 1790 ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); 1791 break; 1792 case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: 1793 ret = vhost_user_backend_handle_shared_object_remove(&payload.object); 1794 break; 1795 case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: 1796 ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, 1797 &hdr, &payload); 1798 break; 1799 default: 1800 error_report("Received unexpected msg type: %d.", hdr.request); 1801 ret = -EINVAL; 1802 } 1803 1804 /* 1805 * REPLY_ACK feature handling. Other reply types has to be managed 1806 * directly in their request handlers. 1807 */ 1808 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1809 payload.u64 = !!ret; 1810 hdr.size = sizeof(payload.u64); 1811 1812 if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { 1813 error_report_err(local_err); 1814 goto err; 1815 } 1816 } 1817 1818 goto fdcleanup; 1819 1820 err: 1821 close_backend_channel(u); 1822 rc = G_SOURCE_REMOVE; 1823 1824 fdcleanup: 1825 if (fd) { 1826 for (i = 0; i < fdsize; i++) { 1827 close(fd[i]); 1828 } 1829 } 1830 return rc; 1831 } 1832 1833 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1834 { 1835 VhostUserMsg msg = { 1836 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1837 .hdr.flags = VHOST_USER_VERSION, 1838 }; 1839 struct vhost_user *u = dev->opaque; 1840 int sv[2], ret = 0; 1841 bool reply_supported = virtio_has_feature(dev->protocol_features, 1842 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1843 Error *local_err = NULL; 1844 QIOChannel *ioc; 1845 1846 if (!virtio_has_feature(dev->protocol_features, 1847 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1848 return 0; 1849 } 1850 1851 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1852 int saved_errno = errno; 1853 error_report("socketpair() failed"); 1854 return -saved_errno; 1855 } 1856 1857 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1858 if (!ioc) { 1859 error_report_err(local_err); 1860 return -ECONNREFUSED; 1861 } 1862 u->backend_ioc = ioc; 1863 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1864 G_IO_IN | G_IO_HUP, 1865 backend_read, dev, NULL, NULL); 1866 1867 if (reply_supported) { 1868 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1869 } 1870 1871 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1872 if (ret) { 1873 goto out; 1874 } 1875 1876 if (reply_supported) { 1877 ret = process_message_reply(dev, &msg); 1878 } 1879 1880 out: 1881 close(sv[1]); 1882 if (ret) { 1883 close_backend_channel(u); 1884 } 1885 1886 return ret; 1887 } 1888 1889 #ifdef CONFIG_LINUX 1890 /* 1891 * Called back from the postcopy fault thread when a fault is received on our 1892 * ufd. 1893 * TODO: This is Linux specific 1894 */ 1895 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1896 void *ufd) 1897 { 1898 struct vhost_dev *dev = pcfd->data; 1899 struct vhost_user *u = dev->opaque; 1900 struct uffd_msg *msg = ufd; 1901 uint64_t faultaddr = msg->arg.pagefault.address; 1902 RAMBlock *rb = NULL; 1903 uint64_t rb_offset; 1904 int i; 1905 1906 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1907 dev->mem->nregions); 1908 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1909 trace_vhost_user_postcopy_fault_handler_loop(i, 1910 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1911 if (faultaddr >= u->postcopy_client_bases[i]) { 1912 /* Ofset of the fault address in the vhost region */ 1913 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1914 if (region_offset < dev->mem->regions[i].memory_size) { 1915 rb_offset = region_offset + u->region_rb_offset[i]; 1916 trace_vhost_user_postcopy_fault_handler_found(i, 1917 region_offset, rb_offset); 1918 rb = u->region_rb[i]; 1919 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1920 rb_offset); 1921 } 1922 } 1923 } 1924 error_report("%s: Failed to find region for fault %" PRIx64, 1925 __func__, faultaddr); 1926 return -1; 1927 } 1928 1929 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1930 uint64_t offset) 1931 { 1932 struct vhost_dev *dev = pcfd->data; 1933 struct vhost_user *u = dev->opaque; 1934 int i; 1935 1936 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1937 1938 if (!u) { 1939 return 0; 1940 } 1941 /* Translate the offset into an address in the clients address space */ 1942 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1943 if (u->region_rb[i] == rb && 1944 offset >= u->region_rb_offset[i] && 1945 offset < (u->region_rb_offset[i] + 1946 dev->mem->regions[i].memory_size)) { 1947 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1948 u->postcopy_client_bases[i]; 1949 trace_vhost_user_postcopy_waker_found(client_addr); 1950 return postcopy_wake_shared(pcfd, client_addr, rb); 1951 } 1952 } 1953 1954 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1955 return 0; 1956 } 1957 #endif 1958 1959 /* 1960 * Called at the start of an inbound postcopy on reception of the 1961 * 'advise' command. 1962 */ 1963 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1964 { 1965 #ifdef CONFIG_LINUX 1966 struct vhost_user *u = dev->opaque; 1967 CharBackend *chr = u->user->chr; 1968 int ufd; 1969 int ret; 1970 VhostUserMsg msg = { 1971 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1972 .hdr.flags = VHOST_USER_VERSION, 1973 }; 1974 1975 ret = vhost_user_write(dev, &msg, NULL, 0); 1976 if (ret < 0) { 1977 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1978 return ret; 1979 } 1980 1981 ret = vhost_user_read(dev, &msg); 1982 if (ret < 0) { 1983 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1984 return ret; 1985 } 1986 1987 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1988 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1989 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1990 return -EPROTO; 1991 } 1992 1993 if (msg.hdr.size) { 1994 error_setg(errp, "Received bad msg size."); 1995 return -EPROTO; 1996 } 1997 ufd = qemu_chr_fe_get_msgfd(chr); 1998 if (ufd < 0) { 1999 error_setg(errp, "%s: Failed to get ufd", __func__); 2000 return -EIO; 2001 } 2002 qemu_socket_set_nonblock(ufd); 2003 2004 /* register ufd with userfault thread */ 2005 u->postcopy_fd.fd = ufd; 2006 u->postcopy_fd.data = dev; 2007 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 2008 u->postcopy_fd.waker = vhost_user_postcopy_waker; 2009 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 2010 postcopy_register_shared_ufd(&u->postcopy_fd); 2011 return 0; 2012 #else 2013 error_setg(errp, "Postcopy not supported on non-Linux systems"); 2014 return -ENOSYS; 2015 #endif 2016 } 2017 2018 /* 2019 * Called at the switch to postcopy on reception of the 'listen' command. 2020 */ 2021 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 2022 { 2023 struct vhost_user *u = dev->opaque; 2024 int ret; 2025 VhostUserMsg msg = { 2026 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 2027 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2028 }; 2029 u->postcopy_listen = true; 2030 2031 trace_vhost_user_postcopy_listen(); 2032 2033 ret = vhost_user_write(dev, &msg, NULL, 0); 2034 if (ret < 0) { 2035 error_setg(errp, "Failed to send postcopy_listen to vhost"); 2036 return ret; 2037 } 2038 2039 ret = process_message_reply(dev, &msg); 2040 if (ret) { 2041 error_setg(errp, "Failed to receive reply to postcopy_listen"); 2042 return ret; 2043 } 2044 2045 return 0; 2046 } 2047 2048 /* 2049 * Called at the end of postcopy 2050 */ 2051 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 2052 { 2053 VhostUserMsg msg = { 2054 .hdr.request = VHOST_USER_POSTCOPY_END, 2055 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2056 }; 2057 int ret; 2058 struct vhost_user *u = dev->opaque; 2059 2060 trace_vhost_user_postcopy_end_entry(); 2061 2062 ret = vhost_user_write(dev, &msg, NULL, 0); 2063 if (ret < 0) { 2064 error_setg(errp, "Failed to send postcopy_end to vhost"); 2065 return ret; 2066 } 2067 2068 ret = process_message_reply(dev, &msg); 2069 if (ret) { 2070 error_setg(errp, "Failed to receive reply to postcopy_end"); 2071 return ret; 2072 } 2073 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2074 close(u->postcopy_fd.fd); 2075 u->postcopy_fd.handler = NULL; 2076 2077 trace_vhost_user_postcopy_end_exit(); 2078 2079 return 0; 2080 } 2081 2082 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2083 void *opaque) 2084 { 2085 struct PostcopyNotifyData *pnd = opaque; 2086 struct vhost_user *u = container_of(notifier, struct vhost_user, 2087 postcopy_notifier); 2088 struct vhost_dev *dev = u->dev; 2089 2090 switch (pnd->reason) { 2091 case POSTCOPY_NOTIFY_PROBE: 2092 if (!virtio_has_feature(dev->protocol_features, 2093 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2094 /* TODO: Get the device name into this error somehow */ 2095 error_setg(pnd->errp, 2096 "vhost-user backend not capable of postcopy"); 2097 return -ENOENT; 2098 } 2099 break; 2100 2101 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2102 return vhost_user_postcopy_advise(dev, pnd->errp); 2103 2104 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2105 return vhost_user_postcopy_listen(dev, pnd->errp); 2106 2107 case POSTCOPY_NOTIFY_INBOUND_END: 2108 return vhost_user_postcopy_end(dev, pnd->errp); 2109 2110 default: 2111 /* We ignore notifications we don't know */ 2112 break; 2113 } 2114 2115 return 0; 2116 } 2117 2118 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2119 Error **errp) 2120 { 2121 uint64_t features, ram_slots; 2122 struct vhost_user *u; 2123 VhostUserState *vus = (VhostUserState *) opaque; 2124 int err; 2125 2126 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2127 2128 u = g_new0(struct vhost_user, 1); 2129 u->user = vus; 2130 u->dev = dev; 2131 dev->opaque = u; 2132 2133 err = vhost_user_get_features(dev, &features); 2134 if (err < 0) { 2135 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2136 return err; 2137 } 2138 2139 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2140 bool supports_f_config = vus->supports_config || 2141 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2142 uint64_t protocol_features; 2143 2144 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2145 2146 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2147 &protocol_features); 2148 if (err < 0) { 2149 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2150 return -EPROTO; 2151 } 2152 2153 /* 2154 * We will use all the protocol features we support - although 2155 * we suppress F_CONFIG if we know QEMUs internal code can not support 2156 * it. 2157 */ 2158 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2159 2160 if (supports_f_config) { 2161 if (!virtio_has_feature(protocol_features, 2162 VHOST_USER_PROTOCOL_F_CONFIG)) { 2163 error_setg(errp, "vhost-user device expecting " 2164 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2165 "not support it."); 2166 return -EPROTO; 2167 } 2168 } else { 2169 if (virtio_has_feature(protocol_features, 2170 VHOST_USER_PROTOCOL_F_CONFIG)) { 2171 warn_report("vhost-user backend supports " 2172 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2173 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2174 } 2175 } 2176 2177 /* final set of protocol features */ 2178 dev->protocol_features = protocol_features; 2179 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2180 if (err < 0) { 2181 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2182 return -EPROTO; 2183 } 2184 2185 /* query the max queues we support if backend supports Multiple Queue */ 2186 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2187 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2188 &dev->max_queues); 2189 if (err < 0) { 2190 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2191 return -EPROTO; 2192 } 2193 } else { 2194 dev->max_queues = 1; 2195 } 2196 2197 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2198 error_setg(errp, "The maximum number of queues supported by the " 2199 "backend is %" PRIu64, dev->max_queues); 2200 return -EINVAL; 2201 } 2202 2203 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2204 !(virtio_has_feature(dev->protocol_features, 2205 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2206 virtio_has_feature(dev->protocol_features, 2207 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2208 error_setg(errp, "IOMMU support requires reply-ack and " 2209 "backend-req protocol features."); 2210 return -EINVAL; 2211 } 2212 2213 /* get max memory regions if backend supports configurable RAM slots */ 2214 if (!virtio_has_feature(dev->protocol_features, 2215 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2216 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2217 } else { 2218 err = vhost_user_get_max_memslots(dev, &ram_slots); 2219 if (err < 0) { 2220 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2221 return -EPROTO; 2222 } 2223 2224 if (ram_slots < u->user->memory_slots) { 2225 error_setg(errp, "The backend specified a max ram slots limit " 2226 "of %" PRIu64", when the prior validated limit was " 2227 "%d. This limit should never decrease.", ram_slots, 2228 u->user->memory_slots); 2229 return -EINVAL; 2230 } 2231 2232 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2233 } 2234 } 2235 2236 if (dev->migration_blocker == NULL && 2237 !virtio_has_feature(dev->protocol_features, 2238 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2239 error_setg(&dev->migration_blocker, 2240 "Migration disabled: vhost-user backend lacks " 2241 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2242 } 2243 2244 if (dev->vq_index == 0) { 2245 err = vhost_setup_backend_channel(dev); 2246 if (err < 0) { 2247 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2248 return -EPROTO; 2249 } 2250 } 2251 2252 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2253 postcopy_add_notifier(&u->postcopy_notifier); 2254 2255 return 0; 2256 } 2257 2258 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2259 { 2260 struct vhost_user *u; 2261 2262 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2263 2264 u = dev->opaque; 2265 if (u->postcopy_notifier.notify) { 2266 postcopy_remove_notifier(&u->postcopy_notifier); 2267 u->postcopy_notifier.notify = NULL; 2268 } 2269 u->postcopy_listen = false; 2270 if (u->postcopy_fd.handler) { 2271 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2272 close(u->postcopy_fd.fd); 2273 u->postcopy_fd.handler = NULL; 2274 } 2275 if (u->backend_ioc) { 2276 close_backend_channel(u); 2277 } 2278 g_free(u->region_rb); 2279 u->region_rb = NULL; 2280 g_free(u->region_rb_offset); 2281 u->region_rb_offset = NULL; 2282 u->region_rb_len = 0; 2283 g_free(u); 2284 dev->opaque = 0; 2285 2286 return 0; 2287 } 2288 2289 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2290 { 2291 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2292 2293 return idx; 2294 } 2295 2296 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2297 { 2298 struct vhost_user *u = dev->opaque; 2299 2300 return u->user->memory_slots; 2301 } 2302 2303 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2304 { 2305 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2306 2307 return virtio_has_feature(dev->protocol_features, 2308 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2309 } 2310 2311 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2312 { 2313 VhostUserMsg msg = { }; 2314 2315 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2316 2317 /* If guest supports GUEST_ANNOUNCE do nothing */ 2318 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2319 return 0; 2320 } 2321 2322 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2323 if (virtio_has_feature(dev->protocol_features, 2324 VHOST_USER_PROTOCOL_F_RARP)) { 2325 msg.hdr.request = VHOST_USER_SEND_RARP; 2326 msg.hdr.flags = VHOST_USER_VERSION; 2327 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2328 msg.hdr.size = sizeof(msg.payload.u64); 2329 2330 return vhost_user_write(dev, &msg, NULL, 0); 2331 } 2332 return -ENOTSUP; 2333 } 2334 2335 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2336 { 2337 VhostUserMsg msg; 2338 bool reply_supported = virtio_has_feature(dev->protocol_features, 2339 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2340 int ret; 2341 2342 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2343 return 0; 2344 } 2345 2346 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2347 msg.payload.u64 = mtu; 2348 msg.hdr.size = sizeof(msg.payload.u64); 2349 msg.hdr.flags = VHOST_USER_VERSION; 2350 if (reply_supported) { 2351 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2352 } 2353 2354 ret = vhost_user_write(dev, &msg, NULL, 0); 2355 if (ret < 0) { 2356 return ret; 2357 } 2358 2359 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2360 if (reply_supported) { 2361 return process_message_reply(dev, &msg); 2362 } 2363 2364 return 0; 2365 } 2366 2367 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2368 struct vhost_iotlb_msg *imsg) 2369 { 2370 int ret; 2371 VhostUserMsg msg = { 2372 .hdr.request = VHOST_USER_IOTLB_MSG, 2373 .hdr.size = sizeof(msg.payload.iotlb), 2374 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2375 .payload.iotlb = *imsg, 2376 }; 2377 2378 ret = vhost_user_write(dev, &msg, NULL, 0); 2379 if (ret < 0) { 2380 return ret; 2381 } 2382 2383 return process_message_reply(dev, &msg); 2384 } 2385 2386 2387 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2388 { 2389 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2390 } 2391 2392 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2393 uint32_t config_len, Error **errp) 2394 { 2395 int ret; 2396 VhostUserMsg msg = { 2397 .hdr.request = VHOST_USER_GET_CONFIG, 2398 .hdr.flags = VHOST_USER_VERSION, 2399 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2400 }; 2401 2402 if (!virtio_has_feature(dev->protocol_features, 2403 VHOST_USER_PROTOCOL_F_CONFIG)) { 2404 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2405 return -EINVAL; 2406 } 2407 2408 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2409 2410 msg.payload.config.offset = 0; 2411 msg.payload.config.size = config_len; 2412 ret = vhost_user_write(dev, &msg, NULL, 0); 2413 if (ret < 0) { 2414 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2415 return ret; 2416 } 2417 2418 ret = vhost_user_read(dev, &msg); 2419 if (ret < 0) { 2420 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2421 return ret; 2422 } 2423 2424 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2425 error_setg(errp, 2426 "Received unexpected msg type. Expected %d received %d", 2427 VHOST_USER_GET_CONFIG, msg.hdr.request); 2428 return -EPROTO; 2429 } 2430 2431 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2432 error_setg(errp, "Received bad msg size."); 2433 return -EPROTO; 2434 } 2435 2436 memcpy(config, msg.payload.config.region, config_len); 2437 2438 return 0; 2439 } 2440 2441 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2442 uint32_t offset, uint32_t size, uint32_t flags) 2443 { 2444 int ret; 2445 uint8_t *p; 2446 bool reply_supported = virtio_has_feature(dev->protocol_features, 2447 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2448 2449 VhostUserMsg msg = { 2450 .hdr.request = VHOST_USER_SET_CONFIG, 2451 .hdr.flags = VHOST_USER_VERSION, 2452 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2453 }; 2454 2455 if (!virtio_has_feature(dev->protocol_features, 2456 VHOST_USER_PROTOCOL_F_CONFIG)) { 2457 return -ENOTSUP; 2458 } 2459 2460 if (reply_supported) { 2461 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2462 } 2463 2464 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2465 return -EINVAL; 2466 } 2467 2468 msg.payload.config.offset = offset, 2469 msg.payload.config.size = size, 2470 msg.payload.config.flags = flags, 2471 p = msg.payload.config.region; 2472 memcpy(p, data, size); 2473 2474 ret = vhost_user_write(dev, &msg, NULL, 0); 2475 if (ret < 0) { 2476 return ret; 2477 } 2478 2479 if (reply_supported) { 2480 return process_message_reply(dev, &msg); 2481 } 2482 2483 return 0; 2484 } 2485 2486 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2487 void *session_info, 2488 uint64_t *session_id) 2489 { 2490 int ret; 2491 bool crypto_session = virtio_has_feature(dev->protocol_features, 2492 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2493 CryptoDevBackendSessionInfo *backend_info = session_info; 2494 VhostUserMsg msg = { 2495 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2496 .hdr.flags = VHOST_USER_VERSION, 2497 .hdr.size = sizeof(msg.payload.session), 2498 }; 2499 2500 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2501 2502 if (!crypto_session) { 2503 error_report("vhost-user trying to send unhandled ioctl"); 2504 return -ENOTSUP; 2505 } 2506 2507 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2508 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2509 size_t keylen; 2510 2511 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2512 sizeof(CryptoDevBackendAsymSessionInfo)); 2513 if (sess->keylen) { 2514 keylen = sizeof(msg.payload.session.u.asym.key); 2515 if (sess->keylen > keylen) { 2516 error_report("Unsupported asymmetric key size"); 2517 return -ENOTSUP; 2518 } 2519 2520 memcpy(&msg.payload.session.u.asym.key, sess->key, 2521 sess->keylen); 2522 } 2523 } else { 2524 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2525 size_t keylen; 2526 2527 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2528 sizeof(CryptoDevBackendSymSessionInfo)); 2529 if (sess->key_len) { 2530 keylen = sizeof(msg.payload.session.u.sym.key); 2531 if (sess->key_len > keylen) { 2532 error_report("Unsupported cipher key size"); 2533 return -ENOTSUP; 2534 } 2535 2536 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2537 sess->key_len); 2538 } 2539 2540 if (sess->auth_key_len > 0) { 2541 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2542 if (sess->auth_key_len > keylen) { 2543 error_report("Unsupported auth key size"); 2544 return -ENOTSUP; 2545 } 2546 2547 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2548 sess->auth_key_len); 2549 } 2550 } 2551 2552 msg.payload.session.op_code = backend_info->op_code; 2553 msg.payload.session.session_id = backend_info->session_id; 2554 ret = vhost_user_write(dev, &msg, NULL, 0); 2555 if (ret < 0) { 2556 error_report("vhost_user_write() return %d, create session failed", 2557 ret); 2558 return ret; 2559 } 2560 2561 ret = vhost_user_read(dev, &msg); 2562 if (ret < 0) { 2563 error_report("vhost_user_read() return %d, create session failed", 2564 ret); 2565 return ret; 2566 } 2567 2568 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2569 error_report("Received unexpected msg type. Expected %d received %d", 2570 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2571 return -EPROTO; 2572 } 2573 2574 if (msg.hdr.size != sizeof(msg.payload.session)) { 2575 error_report("Received bad msg size."); 2576 return -EPROTO; 2577 } 2578 2579 if (msg.payload.session.session_id < 0) { 2580 error_report("Bad session id: %" PRId64 "", 2581 msg.payload.session.session_id); 2582 return -EINVAL; 2583 } 2584 *session_id = msg.payload.session.session_id; 2585 2586 return 0; 2587 } 2588 2589 static int 2590 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2591 { 2592 int ret; 2593 bool crypto_session = virtio_has_feature(dev->protocol_features, 2594 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2595 VhostUserMsg msg = { 2596 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2597 .hdr.flags = VHOST_USER_VERSION, 2598 .hdr.size = sizeof(msg.payload.u64), 2599 }; 2600 msg.payload.u64 = session_id; 2601 2602 if (!crypto_session) { 2603 error_report("vhost-user trying to send unhandled ioctl"); 2604 return -ENOTSUP; 2605 } 2606 2607 ret = vhost_user_write(dev, &msg, NULL, 0); 2608 if (ret < 0) { 2609 error_report("vhost_user_write() return %d, close session failed", 2610 ret); 2611 return ret; 2612 } 2613 2614 return 0; 2615 } 2616 2617 static bool vhost_user_no_private_memslots(struct vhost_dev *dev) 2618 { 2619 return true; 2620 } 2621 2622 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2623 uint16_t queue_size, 2624 struct vhost_inflight *inflight) 2625 { 2626 void *addr; 2627 int fd; 2628 int ret; 2629 struct vhost_user *u = dev->opaque; 2630 CharBackend *chr = u->user->chr; 2631 VhostUserMsg msg = { 2632 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2633 .hdr.flags = VHOST_USER_VERSION, 2634 .payload.inflight.num_queues = dev->nvqs, 2635 .payload.inflight.queue_size = queue_size, 2636 .hdr.size = sizeof(msg.payload.inflight), 2637 }; 2638 2639 if (!virtio_has_feature(dev->protocol_features, 2640 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2641 return 0; 2642 } 2643 2644 ret = vhost_user_write(dev, &msg, NULL, 0); 2645 if (ret < 0) { 2646 return ret; 2647 } 2648 2649 ret = vhost_user_read(dev, &msg); 2650 if (ret < 0) { 2651 return ret; 2652 } 2653 2654 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2655 error_report("Received unexpected msg type. " 2656 "Expected %d received %d", 2657 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2658 return -EPROTO; 2659 } 2660 2661 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2662 error_report("Received bad msg size."); 2663 return -EPROTO; 2664 } 2665 2666 if (!msg.payload.inflight.mmap_size) { 2667 return 0; 2668 } 2669 2670 fd = qemu_chr_fe_get_msgfd(chr); 2671 if (fd < 0) { 2672 error_report("Failed to get mem fd"); 2673 return -EIO; 2674 } 2675 2676 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2677 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2678 2679 if (addr == MAP_FAILED) { 2680 error_report("Failed to mmap mem fd"); 2681 close(fd); 2682 return -EFAULT; 2683 } 2684 2685 inflight->addr = addr; 2686 inflight->fd = fd; 2687 inflight->size = msg.payload.inflight.mmap_size; 2688 inflight->offset = msg.payload.inflight.mmap_offset; 2689 inflight->queue_size = queue_size; 2690 2691 return 0; 2692 } 2693 2694 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2695 struct vhost_inflight *inflight) 2696 { 2697 VhostUserMsg msg = { 2698 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2699 .hdr.flags = VHOST_USER_VERSION, 2700 .payload.inflight.mmap_size = inflight->size, 2701 .payload.inflight.mmap_offset = inflight->offset, 2702 .payload.inflight.num_queues = dev->nvqs, 2703 .payload.inflight.queue_size = inflight->queue_size, 2704 .hdr.size = sizeof(msg.payload.inflight), 2705 }; 2706 2707 if (!virtio_has_feature(dev->protocol_features, 2708 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2709 return 0; 2710 } 2711 2712 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2713 } 2714 2715 static void vhost_user_state_destroy(gpointer data) 2716 { 2717 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2718 if (n) { 2719 vhost_user_host_notifier_remove(n, NULL); 2720 object_unparent(OBJECT(&n->mr)); 2721 /* 2722 * We can't free until vhost_user_host_notifier_remove has 2723 * done it's thing so schedule the free with RCU. 2724 */ 2725 g_free_rcu(n, rcu); 2726 } 2727 } 2728 2729 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2730 { 2731 if (user->chr) { 2732 error_setg(errp, "Cannot initialize vhost-user state"); 2733 return false; 2734 } 2735 user->chr = chr; 2736 user->memory_slots = 0; 2737 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2738 &vhost_user_state_destroy); 2739 return true; 2740 } 2741 2742 void vhost_user_cleanup(VhostUserState *user) 2743 { 2744 if (!user->chr) { 2745 return; 2746 } 2747 memory_region_transaction_begin(); 2748 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2749 memory_region_transaction_commit(); 2750 user->chr = NULL; 2751 } 2752 2753 2754 typedef struct { 2755 vu_async_close_fn cb; 2756 DeviceState *dev; 2757 CharBackend *cd; 2758 struct vhost_dev *vhost; 2759 IOEventHandler *event_cb; 2760 } VhostAsyncCallback; 2761 2762 static void vhost_user_async_close_bh(void *opaque) 2763 { 2764 VhostAsyncCallback *data = opaque; 2765 struct vhost_dev *vhost = data->vhost; 2766 2767 /* 2768 * If the vhost_dev has been cleared in the meantime there is 2769 * nothing left to do as some other path has completed the 2770 * cleanup. 2771 */ 2772 if (vhost->vdev) { 2773 data->cb(data->dev); 2774 } else if (data->event_cb) { 2775 qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, 2776 NULL, data->dev, NULL, true); 2777 } 2778 2779 g_free(data); 2780 } 2781 2782 /* 2783 * We only schedule the work if the machine is running. If suspended 2784 * we want to keep all the in-flight data as is for migration 2785 * purposes. 2786 */ 2787 void vhost_user_async_close(DeviceState *d, 2788 CharBackend *chardev, struct vhost_dev *vhost, 2789 vu_async_close_fn cb, 2790 IOEventHandler *event_cb) 2791 { 2792 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2793 /* 2794 * A close event may happen during a read/write, but vhost 2795 * code assumes the vhost_dev remains setup, so delay the 2796 * stop & clear. 2797 */ 2798 AioContext *ctx = qemu_get_current_aio_context(); 2799 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2800 2801 /* Save data for the callback */ 2802 data->cb = cb; 2803 data->dev = d; 2804 data->cd = chardev; 2805 data->vhost = vhost; 2806 data->event_cb = event_cb; 2807 2808 /* Disable any further notifications on the chardev */ 2809 qemu_chr_fe_set_handlers(chardev, 2810 NULL, NULL, NULL, NULL, NULL, NULL, 2811 false); 2812 2813 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2814 2815 /* 2816 * Move vhost device to the stopped state. The vhost-user device 2817 * will be clean up and disconnected in BH. This can be useful in 2818 * the vhost migration code. If disconnect was caught there is an 2819 * option for the general vhost code to get the dev state without 2820 * knowing its type (in this case vhost-user). 2821 * 2822 * Note if the vhost device is fully cleared by the time we 2823 * execute the bottom half we won't continue with the cleanup. 2824 */ 2825 vhost->started = false; 2826 } 2827 } 2828 2829 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2830 { 2831 if (!virtio_has_feature(dev->protocol_features, 2832 VHOST_USER_PROTOCOL_F_STATUS)) { 2833 return 0; 2834 } 2835 2836 /* Set device status only for last queue pair */ 2837 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2838 return 0; 2839 } 2840 2841 if (started) { 2842 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2843 VIRTIO_CONFIG_S_DRIVER | 2844 VIRTIO_CONFIG_S_DRIVER_OK); 2845 } else { 2846 return 0; 2847 } 2848 } 2849 2850 static void vhost_user_reset_status(struct vhost_dev *dev) 2851 { 2852 /* Set device status only for last queue pair */ 2853 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2854 return; 2855 } 2856 2857 if (virtio_has_feature(dev->protocol_features, 2858 VHOST_USER_PROTOCOL_F_STATUS)) { 2859 vhost_user_set_status(dev, 0); 2860 } 2861 } 2862 2863 const VhostOps user_ops = { 2864 .backend_type = VHOST_BACKEND_TYPE_USER, 2865 .vhost_backend_init = vhost_user_backend_init, 2866 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2867 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2868 .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, 2869 .vhost_set_log_base = vhost_user_set_log_base, 2870 .vhost_set_mem_table = vhost_user_set_mem_table, 2871 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2872 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2873 .vhost_set_vring_num = vhost_user_set_vring_num, 2874 .vhost_set_vring_base = vhost_user_set_vring_base, 2875 .vhost_get_vring_base = vhost_user_get_vring_base, 2876 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2877 .vhost_set_vring_call = vhost_user_set_vring_call, 2878 .vhost_set_vring_err = vhost_user_set_vring_err, 2879 .vhost_set_features = vhost_user_set_features, 2880 .vhost_get_features = vhost_user_get_features, 2881 .vhost_set_owner = vhost_user_set_owner, 2882 .vhost_reset_device = vhost_user_reset_device, 2883 .vhost_get_vq_index = vhost_user_get_vq_index, 2884 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2885 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2886 .vhost_migration_done = vhost_user_migration_done, 2887 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2888 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2889 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2890 .vhost_get_config = vhost_user_get_config, 2891 .vhost_set_config = vhost_user_set_config, 2892 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2893 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2894 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2895 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2896 .vhost_dev_start = vhost_user_dev_start, 2897 .vhost_reset_status = vhost_user_reset_status, 2898 }; 2899