1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/virtio-dmabuf.h" 14 #include "hw/virtio/vhost.h" 15 #include "hw/virtio/virtio-crypto.h" 16 #include "hw/virtio/vhost-user.h" 17 #include "hw/virtio/vhost-backend.h" 18 #include "hw/virtio/virtio.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "chardev/char-fe.h" 21 #include "io/channel-socket.h" 22 #include "sysemu/kvm.h" 23 #include "qemu/error-report.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/uuid.h" 26 #include "qemu/sockets.h" 27 #include "sysemu/runstate.h" 28 #include "sysemu/cryptodev.h" 29 #include "migration/migration.h" 30 #include "migration/postcopy-ram.h" 31 #include "trace.h" 32 #include "exec/ramblock.h" 33 34 #include <sys/ioctl.h> 35 #include <sys/socket.h> 36 #include <sys/un.h> 37 38 #include "standard-headers/linux/vhost_types.h" 39 40 #ifdef CONFIG_LINUX 41 #include <linux/userfaultfd.h> 42 #endif 43 44 #define VHOST_MEMORY_BASELINE_NREGIONS 8 45 #define VHOST_USER_F_PROTOCOL_FEATURES 30 46 #define VHOST_USER_BACKEND_MAX_FDS 8 47 48 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 49 #include "hw/ppc/spapr.h" 50 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 51 52 #else 53 #define VHOST_USER_MAX_RAM_SLOTS 512 54 #endif 55 56 /* 57 * Maximum size of virtio device config space 58 */ 59 #define VHOST_USER_MAX_CONFIG_SIZE 256 60 61 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 62 63 typedef enum VhostUserRequest { 64 VHOST_USER_NONE = 0, 65 VHOST_USER_GET_FEATURES = 1, 66 VHOST_USER_SET_FEATURES = 2, 67 VHOST_USER_SET_OWNER = 3, 68 VHOST_USER_RESET_OWNER = 4, 69 VHOST_USER_SET_MEM_TABLE = 5, 70 VHOST_USER_SET_LOG_BASE = 6, 71 VHOST_USER_SET_LOG_FD = 7, 72 VHOST_USER_SET_VRING_NUM = 8, 73 VHOST_USER_SET_VRING_ADDR = 9, 74 VHOST_USER_SET_VRING_BASE = 10, 75 VHOST_USER_GET_VRING_BASE = 11, 76 VHOST_USER_SET_VRING_KICK = 12, 77 VHOST_USER_SET_VRING_CALL = 13, 78 VHOST_USER_SET_VRING_ERR = 14, 79 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 80 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 81 VHOST_USER_GET_QUEUE_NUM = 17, 82 VHOST_USER_SET_VRING_ENABLE = 18, 83 VHOST_USER_SEND_RARP = 19, 84 VHOST_USER_NET_SET_MTU = 20, 85 VHOST_USER_SET_BACKEND_REQ_FD = 21, 86 VHOST_USER_IOTLB_MSG = 22, 87 VHOST_USER_SET_VRING_ENDIAN = 23, 88 VHOST_USER_GET_CONFIG = 24, 89 VHOST_USER_SET_CONFIG = 25, 90 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 91 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 92 VHOST_USER_POSTCOPY_ADVISE = 28, 93 VHOST_USER_POSTCOPY_LISTEN = 29, 94 VHOST_USER_POSTCOPY_END = 30, 95 VHOST_USER_GET_INFLIGHT_FD = 31, 96 VHOST_USER_SET_INFLIGHT_FD = 32, 97 VHOST_USER_GPU_SET_SOCKET = 33, 98 VHOST_USER_RESET_DEVICE = 34, 99 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 100 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 101 VHOST_USER_ADD_MEM_REG = 37, 102 VHOST_USER_REM_MEM_REG = 38, 103 VHOST_USER_SET_STATUS = 39, 104 VHOST_USER_GET_STATUS = 40, 105 VHOST_USER_GET_SHARED_OBJECT = 41, 106 VHOST_USER_MAX 107 } VhostUserRequest; 108 109 typedef enum VhostUserBackendRequest { 110 VHOST_USER_BACKEND_NONE = 0, 111 VHOST_USER_BACKEND_IOTLB_MSG = 1, 112 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 113 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 114 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, 115 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, 116 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, 117 VHOST_USER_BACKEND_MAX 118 } VhostUserBackendRequest; 119 120 typedef struct VhostUserMemoryRegion { 121 uint64_t guest_phys_addr; 122 uint64_t memory_size; 123 uint64_t userspace_addr; 124 uint64_t mmap_offset; 125 } VhostUserMemoryRegion; 126 127 typedef struct VhostUserMemory { 128 uint32_t nregions; 129 uint32_t padding; 130 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 131 } VhostUserMemory; 132 133 typedef struct VhostUserMemRegMsg { 134 uint64_t padding; 135 VhostUserMemoryRegion region; 136 } VhostUserMemRegMsg; 137 138 typedef struct VhostUserLog { 139 uint64_t mmap_size; 140 uint64_t mmap_offset; 141 } VhostUserLog; 142 143 typedef struct VhostUserConfig { 144 uint32_t offset; 145 uint32_t size; 146 uint32_t flags; 147 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 148 } VhostUserConfig; 149 150 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 151 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 152 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 153 154 typedef struct VhostUserCryptoSession { 155 uint64_t op_code; 156 union { 157 struct { 158 CryptoDevBackendSymSessionInfo session_setup_data; 159 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 160 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 161 } sym; 162 struct { 163 CryptoDevBackendAsymSessionInfo session_setup_data; 164 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 165 } asym; 166 } u; 167 168 /* session id for success, -1 on errors */ 169 int64_t session_id; 170 } VhostUserCryptoSession; 171 172 static VhostUserConfig c __attribute__ ((unused)); 173 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 174 + sizeof(c.size) \ 175 + sizeof(c.flags)) 176 177 typedef struct VhostUserVringArea { 178 uint64_t u64; 179 uint64_t size; 180 uint64_t offset; 181 } VhostUserVringArea; 182 183 typedef struct VhostUserInflight { 184 uint64_t mmap_size; 185 uint64_t mmap_offset; 186 uint16_t num_queues; 187 uint16_t queue_size; 188 } VhostUserInflight; 189 190 typedef struct VhostUserShared { 191 unsigned char uuid[16]; 192 } VhostUserShared; 193 194 typedef struct { 195 VhostUserRequest request; 196 197 #define VHOST_USER_VERSION_MASK (0x3) 198 #define VHOST_USER_REPLY_MASK (0x1 << 2) 199 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 200 uint32_t flags; 201 uint32_t size; /* the following payload size */ 202 } QEMU_PACKED VhostUserHeader; 203 204 typedef union { 205 #define VHOST_USER_VRING_IDX_MASK (0xff) 206 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 207 uint64_t u64; 208 struct vhost_vring_state state; 209 struct vhost_vring_addr addr; 210 VhostUserMemory memory; 211 VhostUserMemRegMsg mem_reg; 212 VhostUserLog log; 213 struct vhost_iotlb_msg iotlb; 214 VhostUserConfig config; 215 VhostUserCryptoSession session; 216 VhostUserVringArea area; 217 VhostUserInflight inflight; 218 VhostUserShared object; 219 } VhostUserPayload; 220 221 typedef struct VhostUserMsg { 222 VhostUserHeader hdr; 223 VhostUserPayload payload; 224 } QEMU_PACKED VhostUserMsg; 225 226 static VhostUserMsg m __attribute__ ((unused)); 227 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 228 229 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 230 231 /* The version of the protocol we support */ 232 #define VHOST_USER_VERSION (0x1) 233 234 struct vhost_user { 235 struct vhost_dev *dev; 236 /* Shared between vhost devs of the same virtio device */ 237 VhostUserState *user; 238 QIOChannel *backend_ioc; 239 GSource *backend_src; 240 NotifierWithReturn postcopy_notifier; 241 struct PostCopyFD postcopy_fd; 242 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 243 /* Length of the region_rb and region_rb_offset arrays */ 244 size_t region_rb_len; 245 /* RAMBlock associated with a given region */ 246 RAMBlock **region_rb; 247 /* 248 * The offset from the start of the RAMBlock to the start of the 249 * vhost region. 250 */ 251 ram_addr_t *region_rb_offset; 252 253 /* True once we've entered postcopy_listen */ 254 bool postcopy_listen; 255 256 /* Our current regions */ 257 int num_shadow_regions; 258 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 259 }; 260 261 struct scrub_regions { 262 struct vhost_memory_region *region; 263 int reg_idx; 264 int fd_idx; 265 }; 266 267 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 268 { 269 struct vhost_user *u = dev->opaque; 270 CharBackend *chr = u->user->chr; 271 uint8_t *p = (uint8_t *) msg; 272 int r, size = VHOST_USER_HDR_SIZE; 273 274 r = qemu_chr_fe_read_all(chr, p, size); 275 if (r != size) { 276 int saved_errno = errno; 277 error_report("Failed to read msg header. Read %d instead of %d." 278 " Original request %d.", r, size, msg->hdr.request); 279 return r < 0 ? -saved_errno : -EIO; 280 } 281 282 /* validate received flags */ 283 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 284 error_report("Failed to read msg header." 285 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 286 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 287 return -EPROTO; 288 } 289 290 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 291 292 return 0; 293 } 294 295 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 296 { 297 struct vhost_user *u = dev->opaque; 298 CharBackend *chr = u->user->chr; 299 uint8_t *p = (uint8_t *) msg; 300 int r, size; 301 302 r = vhost_user_read_header(dev, msg); 303 if (r < 0) { 304 return r; 305 } 306 307 /* validate message size is sane */ 308 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 309 error_report("Failed to read msg header." 310 " Size %d exceeds the maximum %zu.", msg->hdr.size, 311 VHOST_USER_PAYLOAD_SIZE); 312 return -EPROTO; 313 } 314 315 if (msg->hdr.size) { 316 p += VHOST_USER_HDR_SIZE; 317 size = msg->hdr.size; 318 r = qemu_chr_fe_read_all(chr, p, size); 319 if (r != size) { 320 int saved_errno = errno; 321 error_report("Failed to read msg payload." 322 " Read %d instead of %d.", r, msg->hdr.size); 323 return r < 0 ? -saved_errno : -EIO; 324 } 325 } 326 327 return 0; 328 } 329 330 static int process_message_reply(struct vhost_dev *dev, 331 const VhostUserMsg *msg) 332 { 333 int ret; 334 VhostUserMsg msg_reply; 335 336 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 337 return 0; 338 } 339 340 ret = vhost_user_read(dev, &msg_reply); 341 if (ret < 0) { 342 return ret; 343 } 344 345 if (msg_reply.hdr.request != msg->hdr.request) { 346 error_report("Received unexpected msg type. " 347 "Expected %d received %d", 348 msg->hdr.request, msg_reply.hdr.request); 349 return -EPROTO; 350 } 351 352 return msg_reply.payload.u64 ? -EIO : 0; 353 } 354 355 static bool vhost_user_per_device_request(VhostUserRequest request) 356 { 357 switch (request) { 358 case VHOST_USER_SET_OWNER: 359 case VHOST_USER_RESET_OWNER: 360 case VHOST_USER_SET_MEM_TABLE: 361 case VHOST_USER_GET_QUEUE_NUM: 362 case VHOST_USER_NET_SET_MTU: 363 case VHOST_USER_RESET_DEVICE: 364 case VHOST_USER_ADD_MEM_REG: 365 case VHOST_USER_REM_MEM_REG: 366 return true; 367 default: 368 return false; 369 } 370 } 371 372 /* most non-init callers ignore the error */ 373 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 374 int *fds, int fd_num) 375 { 376 struct vhost_user *u = dev->opaque; 377 CharBackend *chr = u->user->chr; 378 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 379 380 /* 381 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 382 * while others, like virtio-net, contain multiple vhost_devs. For 383 * operations such as configuring device memory mappings or issuing device 384 * resets, which affect the whole device instead of individual VQs, 385 * vhost-user messages should only be sent once. 386 * 387 * Devices with multiple vhost_devs are given an associated dev->vq_index 388 * so per_device requests are only sent if vq_index is 0. 389 */ 390 if (vhost_user_per_device_request(msg->hdr.request) 391 && dev->vq_index != 0) { 392 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 393 return 0; 394 } 395 396 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 397 error_report("Failed to set msg fds."); 398 return -EINVAL; 399 } 400 401 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 402 if (ret != size) { 403 int saved_errno = errno; 404 error_report("Failed to write msg." 405 " Wrote %d instead of %d.", ret, size); 406 return ret < 0 ? -saved_errno : -EIO; 407 } 408 409 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 410 411 return 0; 412 } 413 414 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 415 { 416 VhostUserMsg msg = { 417 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 418 .hdr.flags = VHOST_USER_VERSION, 419 }; 420 421 return vhost_user_write(dev, &msg, &fd, 1); 422 } 423 424 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 425 struct vhost_log *log) 426 { 427 int fds[VHOST_USER_MAX_RAM_SLOTS]; 428 size_t fd_num = 0; 429 bool shmfd = virtio_has_feature(dev->protocol_features, 430 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 431 int ret; 432 VhostUserMsg msg = { 433 .hdr.request = VHOST_USER_SET_LOG_BASE, 434 .hdr.flags = VHOST_USER_VERSION, 435 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 436 .payload.log.mmap_offset = 0, 437 .hdr.size = sizeof(msg.payload.log), 438 }; 439 440 /* Send only once with first queue pair */ 441 if (dev->vq_index != 0) { 442 return 0; 443 } 444 445 if (shmfd && log->fd != -1) { 446 fds[fd_num++] = log->fd; 447 } 448 449 ret = vhost_user_write(dev, &msg, fds, fd_num); 450 if (ret < 0) { 451 return ret; 452 } 453 454 if (shmfd) { 455 msg.hdr.size = 0; 456 ret = vhost_user_read(dev, &msg); 457 if (ret < 0) { 458 return ret; 459 } 460 461 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 462 error_report("Received unexpected msg type. " 463 "Expected %d received %d", 464 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 465 return -EPROTO; 466 } 467 } 468 469 return 0; 470 } 471 472 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 473 int *fd) 474 { 475 MemoryRegion *mr; 476 477 assert((uintptr_t)addr == addr); 478 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 479 *fd = memory_region_get_fd(mr); 480 *offset += mr->ram_block->fd_offset; 481 482 return mr; 483 } 484 485 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 486 struct vhost_memory_region *src, 487 uint64_t mmap_offset) 488 { 489 assert(src != NULL && dst != NULL); 490 dst->userspace_addr = src->userspace_addr; 491 dst->memory_size = src->memory_size; 492 dst->guest_phys_addr = src->guest_phys_addr; 493 dst->mmap_offset = mmap_offset; 494 } 495 496 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 497 struct vhost_dev *dev, 498 VhostUserMsg *msg, 499 int *fds, size_t *fd_num, 500 bool track_ramblocks) 501 { 502 int i, fd; 503 ram_addr_t offset; 504 MemoryRegion *mr; 505 struct vhost_memory_region *reg; 506 VhostUserMemoryRegion region_buffer; 507 508 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 509 510 for (i = 0; i < dev->mem->nregions; ++i) { 511 reg = dev->mem->regions + i; 512 513 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 514 if (fd > 0) { 515 if (track_ramblocks) { 516 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 517 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 518 reg->memory_size, 519 reg->guest_phys_addr, 520 reg->userspace_addr, 521 offset); 522 u->region_rb_offset[i] = offset; 523 u->region_rb[i] = mr->ram_block; 524 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 525 error_report("Failed preparing vhost-user memory table msg"); 526 return -ENOBUFS; 527 } 528 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 529 msg->payload.memory.regions[*fd_num] = region_buffer; 530 fds[(*fd_num)++] = fd; 531 } else if (track_ramblocks) { 532 u->region_rb_offset[i] = 0; 533 u->region_rb[i] = NULL; 534 } 535 } 536 537 msg->payload.memory.nregions = *fd_num; 538 539 if (!*fd_num) { 540 error_report("Failed initializing vhost-user memory map, " 541 "consider using -object memory-backend-file share=on"); 542 return -EINVAL; 543 } 544 545 msg->hdr.size = sizeof(msg->payload.memory.nregions); 546 msg->hdr.size += sizeof(msg->payload.memory.padding); 547 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 548 549 return 0; 550 } 551 552 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 553 struct vhost_memory_region *vdev_reg) 554 { 555 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 556 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 557 shadow_reg->memory_size == vdev_reg->memory_size; 558 } 559 560 static void scrub_shadow_regions(struct vhost_dev *dev, 561 struct scrub_regions *add_reg, 562 int *nr_add_reg, 563 struct scrub_regions *rem_reg, 564 int *nr_rem_reg, uint64_t *shadow_pcb, 565 bool track_ramblocks) 566 { 567 struct vhost_user *u = dev->opaque; 568 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 569 struct vhost_memory_region *reg, *shadow_reg; 570 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 571 ram_addr_t offset; 572 MemoryRegion *mr; 573 bool matching; 574 575 /* 576 * Find memory regions present in our shadow state which are not in 577 * the device's current memory state. 578 * 579 * Mark regions in both the shadow and device state as "found". 580 */ 581 for (i = 0; i < u->num_shadow_regions; i++) { 582 shadow_reg = &u->shadow_regions[i]; 583 matching = false; 584 585 for (j = 0; j < dev->mem->nregions; j++) { 586 reg = &dev->mem->regions[j]; 587 588 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 589 590 if (reg_equal(shadow_reg, reg)) { 591 matching = true; 592 found[j] = true; 593 if (track_ramblocks) { 594 /* 595 * Reset postcopy client bases, region_rb, and 596 * region_rb_offset in case regions are removed. 597 */ 598 if (fd > 0) { 599 u->region_rb_offset[j] = offset; 600 u->region_rb[j] = mr->ram_block; 601 shadow_pcb[j] = u->postcopy_client_bases[i]; 602 } else { 603 u->region_rb_offset[j] = 0; 604 u->region_rb[j] = NULL; 605 } 606 } 607 break; 608 } 609 } 610 611 /* 612 * If the region was not found in the current device memory state 613 * create an entry for it in the removed list. 614 */ 615 if (!matching) { 616 rem_reg[rm_idx].region = shadow_reg; 617 rem_reg[rm_idx++].reg_idx = i; 618 } 619 } 620 621 /* 622 * For regions not marked "found", create entries in the added list. 623 * 624 * Note their indexes in the device memory state and the indexes of their 625 * file descriptors. 626 */ 627 for (i = 0; i < dev->mem->nregions; i++) { 628 reg = &dev->mem->regions[i]; 629 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 630 if (fd > 0) { 631 ++fd_num; 632 } 633 634 /* 635 * If the region was in both the shadow and device state we don't 636 * need to send a VHOST_USER_ADD_MEM_REG message for it. 637 */ 638 if (found[i]) { 639 continue; 640 } 641 642 add_reg[add_idx].region = reg; 643 add_reg[add_idx].reg_idx = i; 644 add_reg[add_idx++].fd_idx = fd_num; 645 } 646 *nr_rem_reg = rm_idx; 647 *nr_add_reg = add_idx; 648 649 return; 650 } 651 652 static int send_remove_regions(struct vhost_dev *dev, 653 struct scrub_regions *remove_reg, 654 int nr_rem_reg, VhostUserMsg *msg, 655 bool reply_supported) 656 { 657 struct vhost_user *u = dev->opaque; 658 struct vhost_memory_region *shadow_reg; 659 int i, fd, shadow_reg_idx, ret; 660 ram_addr_t offset; 661 VhostUserMemoryRegion region_buffer; 662 663 /* 664 * The regions in remove_reg appear in the same order they do in the 665 * shadow table. Therefore we can minimize memory copies by iterating 666 * through remove_reg backwards. 667 */ 668 for (i = nr_rem_reg - 1; i >= 0; i--) { 669 shadow_reg = remove_reg[i].region; 670 shadow_reg_idx = remove_reg[i].reg_idx; 671 672 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 673 674 if (fd > 0) { 675 msg->hdr.request = VHOST_USER_REM_MEM_REG; 676 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 677 msg->payload.mem_reg.region = region_buffer; 678 679 ret = vhost_user_write(dev, msg, NULL, 0); 680 if (ret < 0) { 681 return ret; 682 } 683 684 if (reply_supported) { 685 ret = process_message_reply(dev, msg); 686 if (ret) { 687 return ret; 688 } 689 } 690 } 691 692 /* 693 * At this point we know the backend has unmapped the region. It is now 694 * safe to remove it from the shadow table. 695 */ 696 memmove(&u->shadow_regions[shadow_reg_idx], 697 &u->shadow_regions[shadow_reg_idx + 1], 698 sizeof(struct vhost_memory_region) * 699 (u->num_shadow_regions - shadow_reg_idx - 1)); 700 u->num_shadow_regions--; 701 } 702 703 return 0; 704 } 705 706 static int send_add_regions(struct vhost_dev *dev, 707 struct scrub_regions *add_reg, int nr_add_reg, 708 VhostUserMsg *msg, uint64_t *shadow_pcb, 709 bool reply_supported, bool track_ramblocks) 710 { 711 struct vhost_user *u = dev->opaque; 712 int i, fd, ret, reg_idx, reg_fd_idx; 713 struct vhost_memory_region *reg; 714 MemoryRegion *mr; 715 ram_addr_t offset; 716 VhostUserMsg msg_reply; 717 VhostUserMemoryRegion region_buffer; 718 719 for (i = 0; i < nr_add_reg; i++) { 720 reg = add_reg[i].region; 721 reg_idx = add_reg[i].reg_idx; 722 reg_fd_idx = add_reg[i].fd_idx; 723 724 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 725 726 if (fd > 0) { 727 if (track_ramblocks) { 728 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 729 reg->memory_size, 730 reg->guest_phys_addr, 731 reg->userspace_addr, 732 offset); 733 u->region_rb_offset[reg_idx] = offset; 734 u->region_rb[reg_idx] = mr->ram_block; 735 } 736 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 737 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 738 msg->payload.mem_reg.region = region_buffer; 739 740 ret = vhost_user_write(dev, msg, &fd, 1); 741 if (ret < 0) { 742 return ret; 743 } 744 745 if (track_ramblocks) { 746 uint64_t reply_gpa; 747 748 ret = vhost_user_read(dev, &msg_reply); 749 if (ret < 0) { 750 return ret; 751 } 752 753 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 754 755 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 756 error_report("%s: Received unexpected msg type." 757 "Expected %d received %d", __func__, 758 VHOST_USER_ADD_MEM_REG, 759 msg_reply.hdr.request); 760 return -EPROTO; 761 } 762 763 /* 764 * We're using the same structure, just reusing one of the 765 * fields, so it should be the same size. 766 */ 767 if (msg_reply.hdr.size != msg->hdr.size) { 768 error_report("%s: Unexpected size for postcopy reply " 769 "%d vs %d", __func__, msg_reply.hdr.size, 770 msg->hdr.size); 771 return -EPROTO; 772 } 773 774 /* Get the postcopy client base from the backend's reply. */ 775 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 776 shadow_pcb[reg_idx] = 777 msg_reply.payload.mem_reg.region.userspace_addr; 778 trace_vhost_user_set_mem_table_postcopy( 779 msg_reply.payload.mem_reg.region.userspace_addr, 780 msg->payload.mem_reg.region.userspace_addr, 781 reg_fd_idx, reg_idx); 782 } else { 783 error_report("%s: invalid postcopy reply for region. " 784 "Got guest physical address %" PRIX64 ", expected " 785 "%" PRIX64, __func__, reply_gpa, 786 dev->mem->regions[reg_idx].guest_phys_addr); 787 return -EPROTO; 788 } 789 } else if (reply_supported) { 790 ret = process_message_reply(dev, msg); 791 if (ret) { 792 return ret; 793 } 794 } 795 } else if (track_ramblocks) { 796 u->region_rb_offset[reg_idx] = 0; 797 u->region_rb[reg_idx] = NULL; 798 } 799 800 /* 801 * At this point, we know the backend has mapped in the new 802 * region, if the region has a valid file descriptor. 803 * 804 * The region should now be added to the shadow table. 805 */ 806 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 807 reg->guest_phys_addr; 808 u->shadow_regions[u->num_shadow_regions].userspace_addr = 809 reg->userspace_addr; 810 u->shadow_regions[u->num_shadow_regions].memory_size = 811 reg->memory_size; 812 u->num_shadow_regions++; 813 } 814 815 return 0; 816 } 817 818 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 819 VhostUserMsg *msg, 820 bool reply_supported, 821 bool track_ramblocks) 822 { 823 struct vhost_user *u = dev->opaque; 824 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 825 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 826 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 827 int nr_add_reg, nr_rem_reg; 828 int ret; 829 830 msg->hdr.size = sizeof(msg->payload.mem_reg); 831 832 /* Find the regions which need to be removed or added. */ 833 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 834 shadow_pcb, track_ramblocks); 835 836 if (nr_rem_reg) { 837 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 838 reply_supported); 839 if (ret < 0) { 840 goto err; 841 } 842 } 843 844 if (nr_add_reg) { 845 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 846 reply_supported, track_ramblocks); 847 if (ret < 0) { 848 goto err; 849 } 850 } 851 852 if (track_ramblocks) { 853 memcpy(u->postcopy_client_bases, shadow_pcb, 854 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 855 /* 856 * Now we've registered this with the postcopy code, we ack to the 857 * client, because now we're in the position to be able to deal with 858 * any faults it generates. 859 */ 860 /* TODO: Use this for failure cases as well with a bad value. */ 861 msg->hdr.size = sizeof(msg->payload.u64); 862 msg->payload.u64 = 0; /* OK */ 863 864 ret = vhost_user_write(dev, msg, NULL, 0); 865 if (ret < 0) { 866 return ret; 867 } 868 } 869 870 return 0; 871 872 err: 873 if (track_ramblocks) { 874 memcpy(u->postcopy_client_bases, shadow_pcb, 875 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 876 } 877 878 return ret; 879 } 880 881 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 882 struct vhost_memory *mem, 883 bool reply_supported, 884 bool config_mem_slots) 885 { 886 struct vhost_user *u = dev->opaque; 887 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 888 size_t fd_num = 0; 889 VhostUserMsg msg_reply; 890 int region_i, msg_i; 891 int ret; 892 893 VhostUserMsg msg = { 894 .hdr.flags = VHOST_USER_VERSION, 895 }; 896 897 if (u->region_rb_len < dev->mem->nregions) { 898 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 899 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 900 dev->mem->nregions); 901 memset(&(u->region_rb[u->region_rb_len]), '\0', 902 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 903 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 904 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 905 u->region_rb_len = dev->mem->nregions; 906 } 907 908 if (config_mem_slots) { 909 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 910 if (ret < 0) { 911 return ret; 912 } 913 } else { 914 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 915 true); 916 if (ret < 0) { 917 return ret; 918 } 919 920 ret = vhost_user_write(dev, &msg, fds, fd_num); 921 if (ret < 0) { 922 return ret; 923 } 924 925 ret = vhost_user_read(dev, &msg_reply); 926 if (ret < 0) { 927 return ret; 928 } 929 930 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 931 error_report("%s: Received unexpected msg type." 932 "Expected %d received %d", __func__, 933 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 934 return -EPROTO; 935 } 936 937 /* 938 * We're using the same structure, just reusing one of the 939 * fields, so it should be the same size. 940 */ 941 if (msg_reply.hdr.size != msg.hdr.size) { 942 error_report("%s: Unexpected size for postcopy reply " 943 "%d vs %d", __func__, msg_reply.hdr.size, 944 msg.hdr.size); 945 return -EPROTO; 946 } 947 948 memset(u->postcopy_client_bases, 0, 949 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 950 951 /* 952 * They're in the same order as the regions that were sent 953 * but some of the regions were skipped (above) if they 954 * didn't have fd's 955 */ 956 for (msg_i = 0, region_i = 0; 957 region_i < dev->mem->nregions; 958 region_i++) { 959 if (msg_i < fd_num && 960 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 961 dev->mem->regions[region_i].guest_phys_addr) { 962 u->postcopy_client_bases[region_i] = 963 msg_reply.payload.memory.regions[msg_i].userspace_addr; 964 trace_vhost_user_set_mem_table_postcopy( 965 msg_reply.payload.memory.regions[msg_i].userspace_addr, 966 msg.payload.memory.regions[msg_i].userspace_addr, 967 msg_i, region_i); 968 msg_i++; 969 } 970 } 971 if (msg_i != fd_num) { 972 error_report("%s: postcopy reply not fully consumed " 973 "%d vs %zd", 974 __func__, msg_i, fd_num); 975 return -EIO; 976 } 977 978 /* 979 * Now we've registered this with the postcopy code, we ack to the 980 * client, because now we're in the position to be able to deal 981 * with any faults it generates. 982 */ 983 /* TODO: Use this for failure cases as well with a bad value. */ 984 msg.hdr.size = sizeof(msg.payload.u64); 985 msg.payload.u64 = 0; /* OK */ 986 ret = vhost_user_write(dev, &msg, NULL, 0); 987 if (ret < 0) { 988 return ret; 989 } 990 } 991 992 return 0; 993 } 994 995 static int vhost_user_set_mem_table(struct vhost_dev *dev, 996 struct vhost_memory *mem) 997 { 998 struct vhost_user *u = dev->opaque; 999 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1000 size_t fd_num = 0; 1001 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1002 bool reply_supported = virtio_has_feature(dev->protocol_features, 1003 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1004 bool config_mem_slots = 1005 virtio_has_feature(dev->protocol_features, 1006 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1007 int ret; 1008 1009 if (do_postcopy) { 1010 /* 1011 * Postcopy has enough differences that it's best done in it's own 1012 * version 1013 */ 1014 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1015 config_mem_slots); 1016 } 1017 1018 VhostUserMsg msg = { 1019 .hdr.flags = VHOST_USER_VERSION, 1020 }; 1021 1022 if (reply_supported) { 1023 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1024 } 1025 1026 if (config_mem_slots) { 1027 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1028 if (ret < 0) { 1029 return ret; 1030 } 1031 } else { 1032 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1033 false); 1034 if (ret < 0) { 1035 return ret; 1036 } 1037 1038 ret = vhost_user_write(dev, &msg, fds, fd_num); 1039 if (ret < 0) { 1040 return ret; 1041 } 1042 1043 if (reply_supported) { 1044 return process_message_reply(dev, &msg); 1045 } 1046 } 1047 1048 return 0; 1049 } 1050 1051 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1052 struct vhost_vring_state *ring) 1053 { 1054 bool cross_endian = virtio_has_feature(dev->protocol_features, 1055 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1056 VhostUserMsg msg = { 1057 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1058 .hdr.flags = VHOST_USER_VERSION, 1059 .payload.state = *ring, 1060 .hdr.size = sizeof(msg.payload.state), 1061 }; 1062 1063 if (!cross_endian) { 1064 error_report("vhost-user trying to send unhandled ioctl"); 1065 return -ENOTSUP; 1066 } 1067 1068 return vhost_user_write(dev, &msg, NULL, 0); 1069 } 1070 1071 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1072 { 1073 int ret; 1074 VhostUserMsg msg = { 1075 .hdr.request = request, 1076 .hdr.flags = VHOST_USER_VERSION, 1077 }; 1078 1079 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1080 return 0; 1081 } 1082 1083 ret = vhost_user_write(dev, &msg, NULL, 0); 1084 if (ret < 0) { 1085 return ret; 1086 } 1087 1088 ret = vhost_user_read(dev, &msg); 1089 if (ret < 0) { 1090 return ret; 1091 } 1092 1093 if (msg.hdr.request != request) { 1094 error_report("Received unexpected msg type. Expected %d received %d", 1095 request, msg.hdr.request); 1096 return -EPROTO; 1097 } 1098 1099 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1100 error_report("Received bad msg size."); 1101 return -EPROTO; 1102 } 1103 1104 *u64 = msg.payload.u64; 1105 1106 return 0; 1107 } 1108 1109 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1110 { 1111 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1112 return -EPROTO; 1113 } 1114 1115 return 0; 1116 } 1117 1118 /* Note: "msg->hdr.flags" may be modified. */ 1119 static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, 1120 bool wait_for_reply) 1121 { 1122 int ret; 1123 1124 if (wait_for_reply) { 1125 bool reply_supported = virtio_has_feature(dev->protocol_features, 1126 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1127 if (reply_supported) { 1128 msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1129 } 1130 } 1131 1132 ret = vhost_user_write(dev, msg, NULL, 0); 1133 if (ret < 0) { 1134 return ret; 1135 } 1136 1137 if (wait_for_reply) { 1138 uint64_t dummy; 1139 1140 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1141 return process_message_reply(dev, msg); 1142 } 1143 1144 /* 1145 * We need to wait for a reply but the backend does not 1146 * support replies for the command we just sent. 1147 * Send VHOST_USER_GET_FEATURES which makes all backends 1148 * send a reply. 1149 */ 1150 return vhost_user_get_features(dev, &dummy); 1151 } 1152 1153 return 0; 1154 } 1155 1156 static int vhost_set_vring(struct vhost_dev *dev, 1157 unsigned long int request, 1158 struct vhost_vring_state *ring, 1159 bool wait_for_reply) 1160 { 1161 VhostUserMsg msg = { 1162 .hdr.request = request, 1163 .hdr.flags = VHOST_USER_VERSION, 1164 .payload.state = *ring, 1165 .hdr.size = sizeof(msg.payload.state), 1166 }; 1167 1168 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1169 } 1170 1171 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1172 struct vhost_vring_state *ring) 1173 { 1174 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); 1175 } 1176 1177 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1178 { 1179 assert(n && n->unmap_addr); 1180 munmap(n->unmap_addr, qemu_real_host_page_size()); 1181 n->unmap_addr = NULL; 1182 } 1183 1184 /* 1185 * clean-up function for notifier, will finally free the structure 1186 * under rcu. 1187 */ 1188 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1189 VirtIODevice *vdev) 1190 { 1191 if (n->addr) { 1192 if (vdev) { 1193 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1194 } 1195 assert(!n->unmap_addr); 1196 n->unmap_addr = n->addr; 1197 n->addr = NULL; 1198 call_rcu(n, vhost_user_host_notifier_free, rcu); 1199 } 1200 } 1201 1202 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1203 struct vhost_vring_state *ring) 1204 { 1205 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); 1206 } 1207 1208 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1209 { 1210 int i; 1211 1212 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1213 return -EINVAL; 1214 } 1215 1216 for (i = 0; i < dev->nvqs; ++i) { 1217 int ret; 1218 struct vhost_vring_state state = { 1219 .index = dev->vq_index + i, 1220 .num = enable, 1221 }; 1222 1223 /* 1224 * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / 1225 * control plane thread via unix domain socket. Virtio requests travel 1226 * from guest to vhost-user backend / data plane thread via eventfd. 1227 * Even if the guest enables the ring first, and pushes its first virtio 1228 * request second (conforming to the virtio spec), the data plane thread 1229 * in the backend may see the virtio request before the control plane 1230 * thread sees the queue enablement. This causes (in fact, requires) the 1231 * data plane thread to discard the virtio request (it arrived on a 1232 * seemingly disabled queue). To prevent this out-of-order delivery, 1233 * don't let the guest proceed to pushing the virtio request until the 1234 * backend control plane acknowledges enabling the queue -- IOW, pass 1235 * wait_for_reply=true below. 1236 */ 1237 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); 1238 if (ret < 0) { 1239 /* 1240 * Restoring the previous state is likely infeasible, as well as 1241 * proceeding regardless the error, so just bail out and hope for 1242 * the device-level recovery. 1243 */ 1244 return ret; 1245 } 1246 } 1247 1248 return 0; 1249 } 1250 1251 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1252 int idx) 1253 { 1254 if (idx >= u->notifiers->len) { 1255 return NULL; 1256 } 1257 return g_ptr_array_index(u->notifiers, idx); 1258 } 1259 1260 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1261 struct vhost_vring_state *ring) 1262 { 1263 int ret; 1264 VhostUserMsg msg = { 1265 .hdr.request = VHOST_USER_GET_VRING_BASE, 1266 .hdr.flags = VHOST_USER_VERSION, 1267 .payload.state = *ring, 1268 .hdr.size = sizeof(msg.payload.state), 1269 }; 1270 struct vhost_user *u = dev->opaque; 1271 1272 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1273 if (n) { 1274 vhost_user_host_notifier_remove(n, dev->vdev); 1275 } 1276 1277 ret = vhost_user_write(dev, &msg, NULL, 0); 1278 if (ret < 0) { 1279 return ret; 1280 } 1281 1282 ret = vhost_user_read(dev, &msg); 1283 if (ret < 0) { 1284 return ret; 1285 } 1286 1287 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1288 error_report("Received unexpected msg type. Expected %d received %d", 1289 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1290 return -EPROTO; 1291 } 1292 1293 if (msg.hdr.size != sizeof(msg.payload.state)) { 1294 error_report("Received bad msg size."); 1295 return -EPROTO; 1296 } 1297 1298 *ring = msg.payload.state; 1299 1300 return 0; 1301 } 1302 1303 static int vhost_set_vring_file(struct vhost_dev *dev, 1304 VhostUserRequest request, 1305 struct vhost_vring_file *file) 1306 { 1307 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1308 size_t fd_num = 0; 1309 VhostUserMsg msg = { 1310 .hdr.request = request, 1311 .hdr.flags = VHOST_USER_VERSION, 1312 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1313 .hdr.size = sizeof(msg.payload.u64), 1314 }; 1315 1316 if (file->fd > 0) { 1317 fds[fd_num++] = file->fd; 1318 } else { 1319 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1320 } 1321 1322 return vhost_user_write(dev, &msg, fds, fd_num); 1323 } 1324 1325 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1326 struct vhost_vring_file *file) 1327 { 1328 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1329 } 1330 1331 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1332 struct vhost_vring_file *file) 1333 { 1334 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1335 } 1336 1337 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1338 struct vhost_vring_file *file) 1339 { 1340 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1341 } 1342 1343 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1344 struct vhost_vring_addr *addr) 1345 { 1346 VhostUserMsg msg = { 1347 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1348 .hdr.flags = VHOST_USER_VERSION, 1349 .payload.addr = *addr, 1350 .hdr.size = sizeof(msg.payload.addr), 1351 }; 1352 1353 /* 1354 * wait for a reply if logging is enabled to make sure 1355 * backend is actually logging changes 1356 */ 1357 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1358 1359 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1360 } 1361 1362 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1363 bool wait_for_reply) 1364 { 1365 VhostUserMsg msg = { 1366 .hdr.request = request, 1367 .hdr.flags = VHOST_USER_VERSION, 1368 .payload.u64 = u64, 1369 .hdr.size = sizeof(msg.payload.u64), 1370 }; 1371 1372 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1373 } 1374 1375 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1376 { 1377 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1378 } 1379 1380 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1381 { 1382 uint64_t value; 1383 int ret; 1384 1385 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1386 if (ret < 0) { 1387 return ret; 1388 } 1389 *status = value; 1390 1391 return 0; 1392 } 1393 1394 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1395 { 1396 uint8_t s; 1397 int ret; 1398 1399 ret = vhost_user_get_status(dev, &s); 1400 if (ret < 0) { 1401 return ret; 1402 } 1403 1404 if ((s & status) == status) { 1405 return 0; 1406 } 1407 s |= status; 1408 1409 return vhost_user_set_status(dev, s); 1410 } 1411 1412 static int vhost_user_set_features(struct vhost_dev *dev, 1413 uint64_t features) 1414 { 1415 /* 1416 * wait for a reply if logging is enabled to make sure 1417 * backend is actually logging changes 1418 */ 1419 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1420 int ret; 1421 1422 /* 1423 * We need to include any extra backend only feature bits that 1424 * might be needed by our device. Currently this includes the 1425 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1426 * features. 1427 */ 1428 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1429 features | dev->backend_features, 1430 log_enabled); 1431 1432 if (virtio_has_feature(dev->protocol_features, 1433 VHOST_USER_PROTOCOL_F_STATUS)) { 1434 if (!ret) { 1435 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1436 } 1437 } 1438 1439 return ret; 1440 } 1441 1442 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1443 uint64_t features) 1444 { 1445 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1446 false); 1447 } 1448 1449 static int vhost_user_set_owner(struct vhost_dev *dev) 1450 { 1451 VhostUserMsg msg = { 1452 .hdr.request = VHOST_USER_SET_OWNER, 1453 .hdr.flags = VHOST_USER_VERSION, 1454 }; 1455 1456 return vhost_user_write(dev, &msg, NULL, 0); 1457 } 1458 1459 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1460 uint64_t *max_memslots) 1461 { 1462 uint64_t backend_max_memslots; 1463 int err; 1464 1465 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1466 &backend_max_memslots); 1467 if (err < 0) { 1468 return err; 1469 } 1470 1471 *max_memslots = backend_max_memslots; 1472 1473 return 0; 1474 } 1475 1476 static int vhost_user_reset_device(struct vhost_dev *dev) 1477 { 1478 VhostUserMsg msg = { 1479 .hdr.flags = VHOST_USER_VERSION, 1480 .hdr.request = VHOST_USER_RESET_DEVICE, 1481 }; 1482 1483 /* 1484 * Historically, reset was not implemented so only reset devices 1485 * that are expecting it. 1486 */ 1487 if (!virtio_has_feature(dev->protocol_features, 1488 VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { 1489 return -ENOSYS; 1490 } 1491 1492 return vhost_user_write(dev, &msg, NULL, 0); 1493 } 1494 1495 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1496 { 1497 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1498 return -ENOSYS; 1499 } 1500 1501 return dev->config_ops->vhost_dev_config_notifier(dev); 1502 } 1503 1504 /* 1505 * Fetch or create the notifier for a given idx. Newly created 1506 * notifiers are added to the pointer array that tracks them. 1507 */ 1508 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1509 int idx) 1510 { 1511 VhostUserHostNotifier *n = NULL; 1512 if (idx >= u->notifiers->len) { 1513 g_ptr_array_set_size(u->notifiers, idx + 1); 1514 } 1515 1516 n = g_ptr_array_index(u->notifiers, idx); 1517 if (!n) { 1518 /* 1519 * In case notification arrive out-of-order, 1520 * make room for current index. 1521 */ 1522 g_ptr_array_remove_index(u->notifiers, idx); 1523 n = g_new0(VhostUserHostNotifier, 1); 1524 n->idx = idx; 1525 g_ptr_array_insert(u->notifiers, idx, n); 1526 trace_vhost_user_create_notifier(idx, n); 1527 } 1528 1529 return n; 1530 } 1531 1532 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1533 VhostUserVringArea *area, 1534 int fd) 1535 { 1536 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1537 size_t page_size = qemu_real_host_page_size(); 1538 struct vhost_user *u = dev->opaque; 1539 VhostUserState *user = u->user; 1540 VirtIODevice *vdev = dev->vdev; 1541 VhostUserHostNotifier *n; 1542 void *addr; 1543 char *name; 1544 1545 if (!virtio_has_feature(dev->protocol_features, 1546 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1547 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1548 return -EINVAL; 1549 } 1550 1551 /* 1552 * Fetch notifier and invalidate any old data before setting up 1553 * new mapped address. 1554 */ 1555 n = fetch_or_create_notifier(user, queue_idx); 1556 vhost_user_host_notifier_remove(n, vdev); 1557 1558 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1559 return 0; 1560 } 1561 1562 /* Sanity check. */ 1563 if (area->size != page_size) { 1564 return -EINVAL; 1565 } 1566 1567 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1568 fd, area->offset); 1569 if (addr == MAP_FAILED) { 1570 return -EFAULT; 1571 } 1572 1573 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1574 user, queue_idx); 1575 if (!n->mr.ram) { /* Don't init again after suspend. */ 1576 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1577 page_size, addr); 1578 } else { 1579 n->mr.ram_block->host = addr; 1580 } 1581 g_free(name); 1582 1583 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1584 object_unparent(OBJECT(&n->mr)); 1585 munmap(addr, page_size); 1586 return -ENXIO; 1587 } 1588 1589 n->addr = addr; 1590 1591 return 0; 1592 } 1593 1594 static int 1595 vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, 1596 VhostUserShared *object) 1597 { 1598 QemuUUID uuid; 1599 1600 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1601 return virtio_add_vhost_device(&uuid, dev); 1602 } 1603 1604 static int 1605 vhost_user_backend_handle_shared_object_remove(VhostUserShared *object) 1606 { 1607 QemuUUID uuid; 1608 1609 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1610 return virtio_remove_resource(&uuid); 1611 } 1612 1613 static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, 1614 VhostUserPayload *payload, Error **errp) 1615 { 1616 struct iovec iov[] = { 1617 { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, 1618 { .iov_base = payload, .iov_len = hdr->size }, 1619 }; 1620 1621 hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; 1622 hdr->flags |= VHOST_USER_REPLY_MASK; 1623 1624 return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); 1625 } 1626 1627 static bool 1628 vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, 1629 VhostUserPayload *payload, Error **errp) 1630 { 1631 hdr->size = sizeof(payload->u64); 1632 return vhost_user_send_resp(ioc, hdr, payload, errp); 1633 } 1634 1635 int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, 1636 int *dmabuf_fd) 1637 { 1638 struct vhost_user *u = dev->opaque; 1639 CharBackend *chr = u->user->chr; 1640 int ret; 1641 VhostUserMsg msg = { 1642 .hdr.request = VHOST_USER_GET_SHARED_OBJECT, 1643 .hdr.flags = VHOST_USER_VERSION, 1644 }; 1645 memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); 1646 1647 ret = vhost_user_write(dev, &msg, NULL, 0); 1648 if (ret < 0) { 1649 return ret; 1650 } 1651 1652 ret = vhost_user_read(dev, &msg); 1653 if (ret < 0) { 1654 return ret; 1655 } 1656 1657 if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { 1658 error_report("Received unexpected msg type. " 1659 "Expected %d received %d", 1660 VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); 1661 return -EPROTO; 1662 } 1663 1664 *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); 1665 if (*dmabuf_fd < 0) { 1666 error_report("Failed to get dmabuf fd"); 1667 return -EIO; 1668 } 1669 1670 return 0; 1671 } 1672 1673 static int 1674 vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, 1675 QIOChannel *ioc, 1676 VhostUserHeader *hdr, 1677 VhostUserPayload *payload) 1678 { 1679 QemuUUID uuid; 1680 CharBackend *chr = u->user->chr; 1681 Error *local_err = NULL; 1682 int dmabuf_fd = -1; 1683 int fd_num = 0; 1684 1685 memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); 1686 1687 payload->u64 = 0; 1688 switch (virtio_object_type(&uuid)) { 1689 case TYPE_DMABUF: 1690 dmabuf_fd = virtio_lookup_dmabuf(&uuid); 1691 break; 1692 case TYPE_VHOST_DEV: 1693 { 1694 struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); 1695 if (dev == NULL) { 1696 payload->u64 = -EINVAL; 1697 break; 1698 } 1699 int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); 1700 if (ret < 0) { 1701 payload->u64 = ret; 1702 } 1703 break; 1704 } 1705 case TYPE_INVALID: 1706 payload->u64 = -EINVAL; 1707 break; 1708 } 1709 1710 if (dmabuf_fd != -1) { 1711 fd_num++; 1712 } 1713 1714 if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { 1715 error_report("Failed to set msg fds."); 1716 payload->u64 = -EINVAL; 1717 } 1718 1719 if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { 1720 error_report_err(local_err); 1721 return -EINVAL; 1722 } 1723 1724 return 0; 1725 } 1726 1727 static void close_backend_channel(struct vhost_user *u) 1728 { 1729 g_source_destroy(u->backend_src); 1730 g_source_unref(u->backend_src); 1731 u->backend_src = NULL; 1732 object_unref(OBJECT(u->backend_ioc)); 1733 u->backend_ioc = NULL; 1734 } 1735 1736 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1737 gpointer opaque) 1738 { 1739 struct vhost_dev *dev = opaque; 1740 struct vhost_user *u = dev->opaque; 1741 VhostUserHeader hdr = { 0, }; 1742 VhostUserPayload payload = { 0, }; 1743 Error *local_err = NULL; 1744 gboolean rc = G_SOURCE_CONTINUE; 1745 int ret = 0; 1746 struct iovec iov; 1747 g_autofree int *fd = NULL; 1748 size_t fdsize = 0; 1749 int i; 1750 1751 /* Read header */ 1752 iov.iov_base = &hdr; 1753 iov.iov_len = VHOST_USER_HDR_SIZE; 1754 1755 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1756 error_report_err(local_err); 1757 goto err; 1758 } 1759 1760 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1761 error_report("Failed to read msg header." 1762 " Size %d exceeds the maximum %zu.", hdr.size, 1763 VHOST_USER_PAYLOAD_SIZE); 1764 goto err; 1765 } 1766 1767 /* Read payload */ 1768 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1769 error_report_err(local_err); 1770 goto err; 1771 } 1772 1773 switch (hdr.request) { 1774 case VHOST_USER_BACKEND_IOTLB_MSG: 1775 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1776 break; 1777 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1778 ret = vhost_user_backend_handle_config_change(dev); 1779 break; 1780 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1781 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1782 fd ? fd[0] : -1); 1783 break; 1784 case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: 1785 ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); 1786 break; 1787 case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: 1788 ret = vhost_user_backend_handle_shared_object_remove(&payload.object); 1789 break; 1790 case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: 1791 ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, 1792 &hdr, &payload); 1793 break; 1794 default: 1795 error_report("Received unexpected msg type: %d.", hdr.request); 1796 ret = -EINVAL; 1797 } 1798 1799 /* 1800 * REPLY_ACK feature handling. Other reply types has to be managed 1801 * directly in their request handlers. 1802 */ 1803 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1804 payload.u64 = !!ret; 1805 hdr.size = sizeof(payload.u64); 1806 1807 if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { 1808 error_report_err(local_err); 1809 goto err; 1810 } 1811 } 1812 1813 goto fdcleanup; 1814 1815 err: 1816 close_backend_channel(u); 1817 rc = G_SOURCE_REMOVE; 1818 1819 fdcleanup: 1820 if (fd) { 1821 for (i = 0; i < fdsize; i++) { 1822 close(fd[i]); 1823 } 1824 } 1825 return rc; 1826 } 1827 1828 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1829 { 1830 VhostUserMsg msg = { 1831 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1832 .hdr.flags = VHOST_USER_VERSION, 1833 }; 1834 struct vhost_user *u = dev->opaque; 1835 int sv[2], ret = 0; 1836 bool reply_supported = virtio_has_feature(dev->protocol_features, 1837 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1838 Error *local_err = NULL; 1839 QIOChannel *ioc; 1840 1841 if (!virtio_has_feature(dev->protocol_features, 1842 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1843 return 0; 1844 } 1845 1846 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1847 int saved_errno = errno; 1848 error_report("socketpair() failed"); 1849 return -saved_errno; 1850 } 1851 1852 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1853 if (!ioc) { 1854 error_report_err(local_err); 1855 return -ECONNREFUSED; 1856 } 1857 u->backend_ioc = ioc; 1858 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1859 G_IO_IN | G_IO_HUP, 1860 backend_read, dev, NULL, NULL); 1861 1862 if (reply_supported) { 1863 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1864 } 1865 1866 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1867 if (ret) { 1868 goto out; 1869 } 1870 1871 if (reply_supported) { 1872 ret = process_message_reply(dev, &msg); 1873 } 1874 1875 out: 1876 close(sv[1]); 1877 if (ret) { 1878 close_backend_channel(u); 1879 } 1880 1881 return ret; 1882 } 1883 1884 #ifdef CONFIG_LINUX 1885 /* 1886 * Called back from the postcopy fault thread when a fault is received on our 1887 * ufd. 1888 * TODO: This is Linux specific 1889 */ 1890 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1891 void *ufd) 1892 { 1893 struct vhost_dev *dev = pcfd->data; 1894 struct vhost_user *u = dev->opaque; 1895 struct uffd_msg *msg = ufd; 1896 uint64_t faultaddr = msg->arg.pagefault.address; 1897 RAMBlock *rb = NULL; 1898 uint64_t rb_offset; 1899 int i; 1900 1901 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1902 dev->mem->nregions); 1903 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1904 trace_vhost_user_postcopy_fault_handler_loop(i, 1905 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1906 if (faultaddr >= u->postcopy_client_bases[i]) { 1907 /* Ofset of the fault address in the vhost region */ 1908 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1909 if (region_offset < dev->mem->regions[i].memory_size) { 1910 rb_offset = region_offset + u->region_rb_offset[i]; 1911 trace_vhost_user_postcopy_fault_handler_found(i, 1912 region_offset, rb_offset); 1913 rb = u->region_rb[i]; 1914 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1915 rb_offset); 1916 } 1917 } 1918 } 1919 error_report("%s: Failed to find region for fault %" PRIx64, 1920 __func__, faultaddr); 1921 return -1; 1922 } 1923 1924 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1925 uint64_t offset) 1926 { 1927 struct vhost_dev *dev = pcfd->data; 1928 struct vhost_user *u = dev->opaque; 1929 int i; 1930 1931 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1932 1933 if (!u) { 1934 return 0; 1935 } 1936 /* Translate the offset into an address in the clients address space */ 1937 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1938 if (u->region_rb[i] == rb && 1939 offset >= u->region_rb_offset[i] && 1940 offset < (u->region_rb_offset[i] + 1941 dev->mem->regions[i].memory_size)) { 1942 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1943 u->postcopy_client_bases[i]; 1944 trace_vhost_user_postcopy_waker_found(client_addr); 1945 return postcopy_wake_shared(pcfd, client_addr, rb); 1946 } 1947 } 1948 1949 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1950 return 0; 1951 } 1952 #endif 1953 1954 /* 1955 * Called at the start of an inbound postcopy on reception of the 1956 * 'advise' command. 1957 */ 1958 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1959 { 1960 #ifdef CONFIG_LINUX 1961 struct vhost_user *u = dev->opaque; 1962 CharBackend *chr = u->user->chr; 1963 int ufd; 1964 int ret; 1965 VhostUserMsg msg = { 1966 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1967 .hdr.flags = VHOST_USER_VERSION, 1968 }; 1969 1970 ret = vhost_user_write(dev, &msg, NULL, 0); 1971 if (ret < 0) { 1972 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1973 return ret; 1974 } 1975 1976 ret = vhost_user_read(dev, &msg); 1977 if (ret < 0) { 1978 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1979 return ret; 1980 } 1981 1982 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1983 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1984 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1985 return -EPROTO; 1986 } 1987 1988 if (msg.hdr.size) { 1989 error_setg(errp, "Received bad msg size."); 1990 return -EPROTO; 1991 } 1992 ufd = qemu_chr_fe_get_msgfd(chr); 1993 if (ufd < 0) { 1994 error_setg(errp, "%s: Failed to get ufd", __func__); 1995 return -EIO; 1996 } 1997 qemu_socket_set_nonblock(ufd); 1998 1999 /* register ufd with userfault thread */ 2000 u->postcopy_fd.fd = ufd; 2001 u->postcopy_fd.data = dev; 2002 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 2003 u->postcopy_fd.waker = vhost_user_postcopy_waker; 2004 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 2005 postcopy_register_shared_ufd(&u->postcopy_fd); 2006 return 0; 2007 #else 2008 error_setg(errp, "Postcopy not supported on non-Linux systems"); 2009 return -ENOSYS; 2010 #endif 2011 } 2012 2013 /* 2014 * Called at the switch to postcopy on reception of the 'listen' command. 2015 */ 2016 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 2017 { 2018 struct vhost_user *u = dev->opaque; 2019 int ret; 2020 VhostUserMsg msg = { 2021 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 2022 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2023 }; 2024 u->postcopy_listen = true; 2025 2026 trace_vhost_user_postcopy_listen(); 2027 2028 ret = vhost_user_write(dev, &msg, NULL, 0); 2029 if (ret < 0) { 2030 error_setg(errp, "Failed to send postcopy_listen to vhost"); 2031 return ret; 2032 } 2033 2034 ret = process_message_reply(dev, &msg); 2035 if (ret) { 2036 error_setg(errp, "Failed to receive reply to postcopy_listen"); 2037 return ret; 2038 } 2039 2040 return 0; 2041 } 2042 2043 /* 2044 * Called at the end of postcopy 2045 */ 2046 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 2047 { 2048 VhostUserMsg msg = { 2049 .hdr.request = VHOST_USER_POSTCOPY_END, 2050 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2051 }; 2052 int ret; 2053 struct vhost_user *u = dev->opaque; 2054 2055 trace_vhost_user_postcopy_end_entry(); 2056 2057 ret = vhost_user_write(dev, &msg, NULL, 0); 2058 if (ret < 0) { 2059 error_setg(errp, "Failed to send postcopy_end to vhost"); 2060 return ret; 2061 } 2062 2063 ret = process_message_reply(dev, &msg); 2064 if (ret) { 2065 error_setg(errp, "Failed to receive reply to postcopy_end"); 2066 return ret; 2067 } 2068 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2069 close(u->postcopy_fd.fd); 2070 u->postcopy_fd.handler = NULL; 2071 2072 trace_vhost_user_postcopy_end_exit(); 2073 2074 return 0; 2075 } 2076 2077 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2078 void *opaque) 2079 { 2080 struct PostcopyNotifyData *pnd = opaque; 2081 struct vhost_user *u = container_of(notifier, struct vhost_user, 2082 postcopy_notifier); 2083 struct vhost_dev *dev = u->dev; 2084 2085 switch (pnd->reason) { 2086 case POSTCOPY_NOTIFY_PROBE: 2087 if (!virtio_has_feature(dev->protocol_features, 2088 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2089 /* TODO: Get the device name into this error somehow */ 2090 error_setg(pnd->errp, 2091 "vhost-user backend not capable of postcopy"); 2092 return -ENOENT; 2093 } 2094 break; 2095 2096 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2097 return vhost_user_postcopy_advise(dev, pnd->errp); 2098 2099 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2100 return vhost_user_postcopy_listen(dev, pnd->errp); 2101 2102 case POSTCOPY_NOTIFY_INBOUND_END: 2103 return vhost_user_postcopy_end(dev, pnd->errp); 2104 2105 default: 2106 /* We ignore notifications we don't know */ 2107 break; 2108 } 2109 2110 return 0; 2111 } 2112 2113 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2114 Error **errp) 2115 { 2116 uint64_t features, ram_slots; 2117 struct vhost_user *u; 2118 VhostUserState *vus = (VhostUserState *) opaque; 2119 int err; 2120 2121 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2122 2123 u = g_new0(struct vhost_user, 1); 2124 u->user = vus; 2125 u->dev = dev; 2126 dev->opaque = u; 2127 2128 err = vhost_user_get_features(dev, &features); 2129 if (err < 0) { 2130 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2131 return err; 2132 } 2133 2134 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2135 bool supports_f_config = vus->supports_config || 2136 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2137 uint64_t protocol_features; 2138 2139 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2140 2141 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2142 &protocol_features); 2143 if (err < 0) { 2144 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2145 return -EPROTO; 2146 } 2147 2148 /* 2149 * We will use all the protocol features we support - although 2150 * we suppress F_CONFIG if we know QEMUs internal code can not support 2151 * it. 2152 */ 2153 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2154 2155 if (supports_f_config) { 2156 if (!virtio_has_feature(protocol_features, 2157 VHOST_USER_PROTOCOL_F_CONFIG)) { 2158 error_setg(errp, "vhost-user device expecting " 2159 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2160 "not support it."); 2161 return -EPROTO; 2162 } 2163 } else { 2164 if (virtio_has_feature(protocol_features, 2165 VHOST_USER_PROTOCOL_F_CONFIG)) { 2166 warn_report("vhost-user backend supports " 2167 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2168 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2169 } 2170 } 2171 2172 /* final set of protocol features */ 2173 dev->protocol_features = protocol_features; 2174 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2175 if (err < 0) { 2176 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2177 return -EPROTO; 2178 } 2179 2180 /* query the max queues we support if backend supports Multiple Queue */ 2181 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2182 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2183 &dev->max_queues); 2184 if (err < 0) { 2185 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2186 return -EPROTO; 2187 } 2188 } else { 2189 dev->max_queues = 1; 2190 } 2191 2192 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2193 error_setg(errp, "The maximum number of queues supported by the " 2194 "backend is %" PRIu64, dev->max_queues); 2195 return -EINVAL; 2196 } 2197 2198 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2199 !(virtio_has_feature(dev->protocol_features, 2200 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2201 virtio_has_feature(dev->protocol_features, 2202 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2203 error_setg(errp, "IOMMU support requires reply-ack and " 2204 "backend-req protocol features."); 2205 return -EINVAL; 2206 } 2207 2208 /* get max memory regions if backend supports configurable RAM slots */ 2209 if (!virtio_has_feature(dev->protocol_features, 2210 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2211 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2212 } else { 2213 err = vhost_user_get_max_memslots(dev, &ram_slots); 2214 if (err < 0) { 2215 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2216 return -EPROTO; 2217 } 2218 2219 if (ram_slots < u->user->memory_slots) { 2220 error_setg(errp, "The backend specified a max ram slots limit " 2221 "of %" PRIu64", when the prior validated limit was " 2222 "%d. This limit should never decrease.", ram_slots, 2223 u->user->memory_slots); 2224 return -EINVAL; 2225 } 2226 2227 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2228 } 2229 } 2230 2231 if (dev->migration_blocker == NULL && 2232 !virtio_has_feature(dev->protocol_features, 2233 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2234 error_setg(&dev->migration_blocker, 2235 "Migration disabled: vhost-user backend lacks " 2236 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2237 } 2238 2239 if (dev->vq_index == 0) { 2240 err = vhost_setup_backend_channel(dev); 2241 if (err < 0) { 2242 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2243 return -EPROTO; 2244 } 2245 } 2246 2247 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2248 postcopy_add_notifier(&u->postcopy_notifier); 2249 2250 return 0; 2251 } 2252 2253 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2254 { 2255 struct vhost_user *u; 2256 2257 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2258 2259 u = dev->opaque; 2260 if (u->postcopy_notifier.notify) { 2261 postcopy_remove_notifier(&u->postcopy_notifier); 2262 u->postcopy_notifier.notify = NULL; 2263 } 2264 u->postcopy_listen = false; 2265 if (u->postcopy_fd.handler) { 2266 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2267 close(u->postcopy_fd.fd); 2268 u->postcopy_fd.handler = NULL; 2269 } 2270 if (u->backend_ioc) { 2271 close_backend_channel(u); 2272 } 2273 g_free(u->region_rb); 2274 u->region_rb = NULL; 2275 g_free(u->region_rb_offset); 2276 u->region_rb_offset = NULL; 2277 u->region_rb_len = 0; 2278 g_free(u); 2279 dev->opaque = 0; 2280 2281 return 0; 2282 } 2283 2284 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2285 { 2286 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2287 2288 return idx; 2289 } 2290 2291 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2292 { 2293 struct vhost_user *u = dev->opaque; 2294 2295 return u->user->memory_slots; 2296 } 2297 2298 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2299 { 2300 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2301 2302 return virtio_has_feature(dev->protocol_features, 2303 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2304 } 2305 2306 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2307 { 2308 VhostUserMsg msg = { }; 2309 2310 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2311 2312 /* If guest supports GUEST_ANNOUNCE do nothing */ 2313 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2314 return 0; 2315 } 2316 2317 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2318 if (virtio_has_feature(dev->protocol_features, 2319 VHOST_USER_PROTOCOL_F_RARP)) { 2320 msg.hdr.request = VHOST_USER_SEND_RARP; 2321 msg.hdr.flags = VHOST_USER_VERSION; 2322 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2323 msg.hdr.size = sizeof(msg.payload.u64); 2324 2325 return vhost_user_write(dev, &msg, NULL, 0); 2326 } 2327 return -ENOTSUP; 2328 } 2329 2330 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2331 { 2332 VhostUserMsg msg; 2333 bool reply_supported = virtio_has_feature(dev->protocol_features, 2334 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2335 int ret; 2336 2337 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2338 return 0; 2339 } 2340 2341 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2342 msg.payload.u64 = mtu; 2343 msg.hdr.size = sizeof(msg.payload.u64); 2344 msg.hdr.flags = VHOST_USER_VERSION; 2345 if (reply_supported) { 2346 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2347 } 2348 2349 ret = vhost_user_write(dev, &msg, NULL, 0); 2350 if (ret < 0) { 2351 return ret; 2352 } 2353 2354 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2355 if (reply_supported) { 2356 return process_message_reply(dev, &msg); 2357 } 2358 2359 return 0; 2360 } 2361 2362 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2363 struct vhost_iotlb_msg *imsg) 2364 { 2365 int ret; 2366 VhostUserMsg msg = { 2367 .hdr.request = VHOST_USER_IOTLB_MSG, 2368 .hdr.size = sizeof(msg.payload.iotlb), 2369 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2370 .payload.iotlb = *imsg, 2371 }; 2372 2373 ret = vhost_user_write(dev, &msg, NULL, 0); 2374 if (ret < 0) { 2375 return ret; 2376 } 2377 2378 return process_message_reply(dev, &msg); 2379 } 2380 2381 2382 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2383 { 2384 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2385 } 2386 2387 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2388 uint32_t config_len, Error **errp) 2389 { 2390 int ret; 2391 VhostUserMsg msg = { 2392 .hdr.request = VHOST_USER_GET_CONFIG, 2393 .hdr.flags = VHOST_USER_VERSION, 2394 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2395 }; 2396 2397 if (!virtio_has_feature(dev->protocol_features, 2398 VHOST_USER_PROTOCOL_F_CONFIG)) { 2399 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2400 return -EINVAL; 2401 } 2402 2403 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2404 2405 msg.payload.config.offset = 0; 2406 msg.payload.config.size = config_len; 2407 ret = vhost_user_write(dev, &msg, NULL, 0); 2408 if (ret < 0) { 2409 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2410 return ret; 2411 } 2412 2413 ret = vhost_user_read(dev, &msg); 2414 if (ret < 0) { 2415 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2416 return ret; 2417 } 2418 2419 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2420 error_setg(errp, 2421 "Received unexpected msg type. Expected %d received %d", 2422 VHOST_USER_GET_CONFIG, msg.hdr.request); 2423 return -EPROTO; 2424 } 2425 2426 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2427 error_setg(errp, "Received bad msg size."); 2428 return -EPROTO; 2429 } 2430 2431 memcpy(config, msg.payload.config.region, config_len); 2432 2433 return 0; 2434 } 2435 2436 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2437 uint32_t offset, uint32_t size, uint32_t flags) 2438 { 2439 int ret; 2440 uint8_t *p; 2441 bool reply_supported = virtio_has_feature(dev->protocol_features, 2442 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2443 2444 VhostUserMsg msg = { 2445 .hdr.request = VHOST_USER_SET_CONFIG, 2446 .hdr.flags = VHOST_USER_VERSION, 2447 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2448 }; 2449 2450 if (!virtio_has_feature(dev->protocol_features, 2451 VHOST_USER_PROTOCOL_F_CONFIG)) { 2452 return -ENOTSUP; 2453 } 2454 2455 if (reply_supported) { 2456 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2457 } 2458 2459 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2460 return -EINVAL; 2461 } 2462 2463 msg.payload.config.offset = offset, 2464 msg.payload.config.size = size, 2465 msg.payload.config.flags = flags, 2466 p = msg.payload.config.region; 2467 memcpy(p, data, size); 2468 2469 ret = vhost_user_write(dev, &msg, NULL, 0); 2470 if (ret < 0) { 2471 return ret; 2472 } 2473 2474 if (reply_supported) { 2475 return process_message_reply(dev, &msg); 2476 } 2477 2478 return 0; 2479 } 2480 2481 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2482 void *session_info, 2483 uint64_t *session_id) 2484 { 2485 int ret; 2486 bool crypto_session = virtio_has_feature(dev->protocol_features, 2487 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2488 CryptoDevBackendSessionInfo *backend_info = session_info; 2489 VhostUserMsg msg = { 2490 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2491 .hdr.flags = VHOST_USER_VERSION, 2492 .hdr.size = sizeof(msg.payload.session), 2493 }; 2494 2495 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2496 2497 if (!crypto_session) { 2498 error_report("vhost-user trying to send unhandled ioctl"); 2499 return -ENOTSUP; 2500 } 2501 2502 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2503 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2504 size_t keylen; 2505 2506 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2507 sizeof(CryptoDevBackendAsymSessionInfo)); 2508 if (sess->keylen) { 2509 keylen = sizeof(msg.payload.session.u.asym.key); 2510 if (sess->keylen > keylen) { 2511 error_report("Unsupported asymmetric key size"); 2512 return -ENOTSUP; 2513 } 2514 2515 memcpy(&msg.payload.session.u.asym.key, sess->key, 2516 sess->keylen); 2517 } 2518 } else { 2519 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2520 size_t keylen; 2521 2522 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2523 sizeof(CryptoDevBackendSymSessionInfo)); 2524 if (sess->key_len) { 2525 keylen = sizeof(msg.payload.session.u.sym.key); 2526 if (sess->key_len > keylen) { 2527 error_report("Unsupported cipher key size"); 2528 return -ENOTSUP; 2529 } 2530 2531 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2532 sess->key_len); 2533 } 2534 2535 if (sess->auth_key_len > 0) { 2536 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2537 if (sess->auth_key_len > keylen) { 2538 error_report("Unsupported auth key size"); 2539 return -ENOTSUP; 2540 } 2541 2542 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2543 sess->auth_key_len); 2544 } 2545 } 2546 2547 msg.payload.session.op_code = backend_info->op_code; 2548 msg.payload.session.session_id = backend_info->session_id; 2549 ret = vhost_user_write(dev, &msg, NULL, 0); 2550 if (ret < 0) { 2551 error_report("vhost_user_write() return %d, create session failed", 2552 ret); 2553 return ret; 2554 } 2555 2556 ret = vhost_user_read(dev, &msg); 2557 if (ret < 0) { 2558 error_report("vhost_user_read() return %d, create session failed", 2559 ret); 2560 return ret; 2561 } 2562 2563 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2564 error_report("Received unexpected msg type. Expected %d received %d", 2565 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2566 return -EPROTO; 2567 } 2568 2569 if (msg.hdr.size != sizeof(msg.payload.session)) { 2570 error_report("Received bad msg size."); 2571 return -EPROTO; 2572 } 2573 2574 if (msg.payload.session.session_id < 0) { 2575 error_report("Bad session id: %" PRId64 "", 2576 msg.payload.session.session_id); 2577 return -EINVAL; 2578 } 2579 *session_id = msg.payload.session.session_id; 2580 2581 return 0; 2582 } 2583 2584 static int 2585 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2586 { 2587 int ret; 2588 bool crypto_session = virtio_has_feature(dev->protocol_features, 2589 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2590 VhostUserMsg msg = { 2591 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2592 .hdr.flags = VHOST_USER_VERSION, 2593 .hdr.size = sizeof(msg.payload.u64), 2594 }; 2595 msg.payload.u64 = session_id; 2596 2597 if (!crypto_session) { 2598 error_report("vhost-user trying to send unhandled ioctl"); 2599 return -ENOTSUP; 2600 } 2601 2602 ret = vhost_user_write(dev, &msg, NULL, 0); 2603 if (ret < 0) { 2604 error_report("vhost_user_write() return %d, close session failed", 2605 ret); 2606 return ret; 2607 } 2608 2609 return 0; 2610 } 2611 2612 static bool vhost_user_no_private_memslots(struct vhost_dev *dev) 2613 { 2614 return true; 2615 } 2616 2617 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2618 uint16_t queue_size, 2619 struct vhost_inflight *inflight) 2620 { 2621 void *addr; 2622 int fd; 2623 int ret; 2624 struct vhost_user *u = dev->opaque; 2625 CharBackend *chr = u->user->chr; 2626 VhostUserMsg msg = { 2627 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2628 .hdr.flags = VHOST_USER_VERSION, 2629 .payload.inflight.num_queues = dev->nvqs, 2630 .payload.inflight.queue_size = queue_size, 2631 .hdr.size = sizeof(msg.payload.inflight), 2632 }; 2633 2634 if (!virtio_has_feature(dev->protocol_features, 2635 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2636 return 0; 2637 } 2638 2639 ret = vhost_user_write(dev, &msg, NULL, 0); 2640 if (ret < 0) { 2641 return ret; 2642 } 2643 2644 ret = vhost_user_read(dev, &msg); 2645 if (ret < 0) { 2646 return ret; 2647 } 2648 2649 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2650 error_report("Received unexpected msg type. " 2651 "Expected %d received %d", 2652 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2653 return -EPROTO; 2654 } 2655 2656 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2657 error_report("Received bad msg size."); 2658 return -EPROTO; 2659 } 2660 2661 if (!msg.payload.inflight.mmap_size) { 2662 return 0; 2663 } 2664 2665 fd = qemu_chr_fe_get_msgfd(chr); 2666 if (fd < 0) { 2667 error_report("Failed to get mem fd"); 2668 return -EIO; 2669 } 2670 2671 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2672 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2673 2674 if (addr == MAP_FAILED) { 2675 error_report("Failed to mmap mem fd"); 2676 close(fd); 2677 return -EFAULT; 2678 } 2679 2680 inflight->addr = addr; 2681 inflight->fd = fd; 2682 inflight->size = msg.payload.inflight.mmap_size; 2683 inflight->offset = msg.payload.inflight.mmap_offset; 2684 inflight->queue_size = queue_size; 2685 2686 return 0; 2687 } 2688 2689 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2690 struct vhost_inflight *inflight) 2691 { 2692 VhostUserMsg msg = { 2693 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2694 .hdr.flags = VHOST_USER_VERSION, 2695 .payload.inflight.mmap_size = inflight->size, 2696 .payload.inflight.mmap_offset = inflight->offset, 2697 .payload.inflight.num_queues = dev->nvqs, 2698 .payload.inflight.queue_size = inflight->queue_size, 2699 .hdr.size = sizeof(msg.payload.inflight), 2700 }; 2701 2702 if (!virtio_has_feature(dev->protocol_features, 2703 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2704 return 0; 2705 } 2706 2707 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2708 } 2709 2710 static void vhost_user_state_destroy(gpointer data) 2711 { 2712 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2713 if (n) { 2714 vhost_user_host_notifier_remove(n, NULL); 2715 object_unparent(OBJECT(&n->mr)); 2716 /* 2717 * We can't free until vhost_user_host_notifier_remove has 2718 * done it's thing so schedule the free with RCU. 2719 */ 2720 g_free_rcu(n, rcu); 2721 } 2722 } 2723 2724 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2725 { 2726 if (user->chr) { 2727 error_setg(errp, "Cannot initialize vhost-user state"); 2728 return false; 2729 } 2730 user->chr = chr; 2731 user->memory_slots = 0; 2732 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2733 &vhost_user_state_destroy); 2734 return true; 2735 } 2736 2737 void vhost_user_cleanup(VhostUserState *user) 2738 { 2739 if (!user->chr) { 2740 return; 2741 } 2742 memory_region_transaction_begin(); 2743 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2744 memory_region_transaction_commit(); 2745 user->chr = NULL; 2746 } 2747 2748 2749 typedef struct { 2750 vu_async_close_fn cb; 2751 DeviceState *dev; 2752 CharBackend *cd; 2753 struct vhost_dev *vhost; 2754 IOEventHandler *event_cb; 2755 } VhostAsyncCallback; 2756 2757 static void vhost_user_async_close_bh(void *opaque) 2758 { 2759 VhostAsyncCallback *data = opaque; 2760 struct vhost_dev *vhost = data->vhost; 2761 2762 /* 2763 * If the vhost_dev has been cleared in the meantime there is 2764 * nothing left to do as some other path has completed the 2765 * cleanup. 2766 */ 2767 if (vhost->vdev) { 2768 data->cb(data->dev); 2769 } else if (data->event_cb) { 2770 qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, 2771 NULL, data->dev, NULL, true); 2772 } 2773 2774 g_free(data); 2775 } 2776 2777 /* 2778 * We only schedule the work if the machine is running. If suspended 2779 * we want to keep all the in-flight data as is for migration 2780 * purposes. 2781 */ 2782 void vhost_user_async_close(DeviceState *d, 2783 CharBackend *chardev, struct vhost_dev *vhost, 2784 vu_async_close_fn cb, 2785 IOEventHandler *event_cb) 2786 { 2787 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2788 /* 2789 * A close event may happen during a read/write, but vhost 2790 * code assumes the vhost_dev remains setup, so delay the 2791 * stop & clear. 2792 */ 2793 AioContext *ctx = qemu_get_current_aio_context(); 2794 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2795 2796 /* Save data for the callback */ 2797 data->cb = cb; 2798 data->dev = d; 2799 data->cd = chardev; 2800 data->vhost = vhost; 2801 data->event_cb = event_cb; 2802 2803 /* Disable any further notifications on the chardev */ 2804 qemu_chr_fe_set_handlers(chardev, 2805 NULL, NULL, NULL, NULL, NULL, NULL, 2806 false); 2807 2808 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2809 2810 /* 2811 * Move vhost device to the stopped state. The vhost-user device 2812 * will be clean up and disconnected in BH. This can be useful in 2813 * the vhost migration code. If disconnect was caught there is an 2814 * option for the general vhost code to get the dev state without 2815 * knowing its type (in this case vhost-user). 2816 * 2817 * Note if the vhost device is fully cleared by the time we 2818 * execute the bottom half we won't continue with the cleanup. 2819 */ 2820 vhost->started = false; 2821 } 2822 } 2823 2824 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2825 { 2826 if (!virtio_has_feature(dev->protocol_features, 2827 VHOST_USER_PROTOCOL_F_STATUS)) { 2828 return 0; 2829 } 2830 2831 /* Set device status only for last queue pair */ 2832 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2833 return 0; 2834 } 2835 2836 if (started) { 2837 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2838 VIRTIO_CONFIG_S_DRIVER | 2839 VIRTIO_CONFIG_S_DRIVER_OK); 2840 } else { 2841 return 0; 2842 } 2843 } 2844 2845 static void vhost_user_reset_status(struct vhost_dev *dev) 2846 { 2847 /* Set device status only for last queue pair */ 2848 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2849 return; 2850 } 2851 2852 if (virtio_has_feature(dev->protocol_features, 2853 VHOST_USER_PROTOCOL_F_STATUS)) { 2854 vhost_user_set_status(dev, 0); 2855 } 2856 } 2857 2858 const VhostOps user_ops = { 2859 .backend_type = VHOST_BACKEND_TYPE_USER, 2860 .vhost_backend_init = vhost_user_backend_init, 2861 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2862 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2863 .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, 2864 .vhost_set_log_base = vhost_user_set_log_base, 2865 .vhost_set_mem_table = vhost_user_set_mem_table, 2866 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2867 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2868 .vhost_set_vring_num = vhost_user_set_vring_num, 2869 .vhost_set_vring_base = vhost_user_set_vring_base, 2870 .vhost_get_vring_base = vhost_user_get_vring_base, 2871 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2872 .vhost_set_vring_call = vhost_user_set_vring_call, 2873 .vhost_set_vring_err = vhost_user_set_vring_err, 2874 .vhost_set_features = vhost_user_set_features, 2875 .vhost_get_features = vhost_user_get_features, 2876 .vhost_set_owner = vhost_user_set_owner, 2877 .vhost_reset_device = vhost_user_reset_device, 2878 .vhost_get_vq_index = vhost_user_get_vq_index, 2879 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2880 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2881 .vhost_migration_done = vhost_user_migration_done, 2882 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2883 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2884 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2885 .vhost_get_config = vhost_user_get_config, 2886 .vhost_set_config = vhost_user_set_config, 2887 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2888 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2889 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2890 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2891 .vhost_dev_start = vhost_user_dev_start, 2892 .vhost_reset_status = vhost_user_reset_status, 2893 }; 2894