1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/virtio-dmabuf.h" 14 #include "hw/virtio/vhost.h" 15 #include "hw/virtio/virtio-crypto.h" 16 #include "hw/virtio/vhost-user.h" 17 #include "hw/virtio/vhost-backend.h" 18 #include "hw/virtio/virtio.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "chardev/char-fe.h" 21 #include "io/channel-socket.h" 22 #include "sysemu/kvm.h" 23 #include "qemu/error-report.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/uuid.h" 26 #include "qemu/sockets.h" 27 #include "sysemu/runstate.h" 28 #include "sysemu/cryptodev.h" 29 #include "migration/postcopy-ram.h" 30 #include "trace.h" 31 #include "exec/ramblock.h" 32 33 #include <sys/ioctl.h> 34 #include <sys/socket.h> 35 #include <sys/un.h> 36 37 #include "standard-headers/linux/vhost_types.h" 38 39 #ifdef CONFIG_LINUX 40 #include <linux/userfaultfd.h> 41 #endif 42 43 #define VHOST_MEMORY_BASELINE_NREGIONS 8 44 #define VHOST_USER_F_PROTOCOL_FEATURES 30 45 #define VHOST_USER_BACKEND_MAX_FDS 8 46 47 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 48 #include "hw/ppc/spapr.h" 49 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 50 51 #else 52 #define VHOST_USER_MAX_RAM_SLOTS 512 53 #endif 54 55 /* 56 * Maximum size of virtio device config space 57 */ 58 #define VHOST_USER_MAX_CONFIG_SIZE 256 59 60 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 61 62 typedef enum VhostUserRequest { 63 VHOST_USER_NONE = 0, 64 VHOST_USER_GET_FEATURES = 1, 65 VHOST_USER_SET_FEATURES = 2, 66 VHOST_USER_SET_OWNER = 3, 67 VHOST_USER_RESET_OWNER = 4, 68 VHOST_USER_SET_MEM_TABLE = 5, 69 VHOST_USER_SET_LOG_BASE = 6, 70 VHOST_USER_SET_LOG_FD = 7, 71 VHOST_USER_SET_VRING_NUM = 8, 72 VHOST_USER_SET_VRING_ADDR = 9, 73 VHOST_USER_SET_VRING_BASE = 10, 74 VHOST_USER_GET_VRING_BASE = 11, 75 VHOST_USER_SET_VRING_KICK = 12, 76 VHOST_USER_SET_VRING_CALL = 13, 77 VHOST_USER_SET_VRING_ERR = 14, 78 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 79 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 80 VHOST_USER_GET_QUEUE_NUM = 17, 81 VHOST_USER_SET_VRING_ENABLE = 18, 82 VHOST_USER_SEND_RARP = 19, 83 VHOST_USER_NET_SET_MTU = 20, 84 VHOST_USER_SET_BACKEND_REQ_FD = 21, 85 VHOST_USER_IOTLB_MSG = 22, 86 VHOST_USER_SET_VRING_ENDIAN = 23, 87 VHOST_USER_GET_CONFIG = 24, 88 VHOST_USER_SET_CONFIG = 25, 89 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 90 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 91 VHOST_USER_POSTCOPY_ADVISE = 28, 92 VHOST_USER_POSTCOPY_LISTEN = 29, 93 VHOST_USER_POSTCOPY_END = 30, 94 VHOST_USER_GET_INFLIGHT_FD = 31, 95 VHOST_USER_SET_INFLIGHT_FD = 32, 96 VHOST_USER_GPU_SET_SOCKET = 33, 97 VHOST_USER_RESET_DEVICE = 34, 98 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 99 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 100 VHOST_USER_ADD_MEM_REG = 37, 101 VHOST_USER_REM_MEM_REG = 38, 102 VHOST_USER_SET_STATUS = 39, 103 VHOST_USER_GET_STATUS = 40, 104 VHOST_USER_GET_SHARED_OBJECT = 41, 105 VHOST_USER_SET_DEVICE_STATE_FD = 42, 106 VHOST_USER_CHECK_DEVICE_STATE = 43, 107 VHOST_USER_MAX 108 } VhostUserRequest; 109 110 typedef enum VhostUserBackendRequest { 111 VHOST_USER_BACKEND_NONE = 0, 112 VHOST_USER_BACKEND_IOTLB_MSG = 1, 113 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 114 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 115 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, 116 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, 117 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, 118 VHOST_USER_BACKEND_MAX 119 } VhostUserBackendRequest; 120 121 typedef struct VhostUserMemoryRegion { 122 uint64_t guest_phys_addr; 123 uint64_t memory_size; 124 uint64_t userspace_addr; 125 uint64_t mmap_offset; 126 } VhostUserMemoryRegion; 127 128 typedef struct VhostUserMemory { 129 uint32_t nregions; 130 uint32_t padding; 131 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 132 } VhostUserMemory; 133 134 typedef struct VhostUserMemRegMsg { 135 uint64_t padding; 136 VhostUserMemoryRegion region; 137 } VhostUserMemRegMsg; 138 139 typedef struct VhostUserLog { 140 uint64_t mmap_size; 141 uint64_t mmap_offset; 142 } VhostUserLog; 143 144 typedef struct VhostUserConfig { 145 uint32_t offset; 146 uint32_t size; 147 uint32_t flags; 148 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 149 } VhostUserConfig; 150 151 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 152 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 153 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 154 155 typedef struct VhostUserCryptoSession { 156 uint64_t op_code; 157 union { 158 struct { 159 CryptoDevBackendSymSessionInfo session_setup_data; 160 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 161 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 162 } sym; 163 struct { 164 CryptoDevBackendAsymSessionInfo session_setup_data; 165 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 166 } asym; 167 } u; 168 169 /* session id for success, -1 on errors */ 170 int64_t session_id; 171 } VhostUserCryptoSession; 172 173 static VhostUserConfig c __attribute__ ((unused)); 174 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 175 + sizeof(c.size) \ 176 + sizeof(c.flags)) 177 178 typedef struct VhostUserVringArea { 179 uint64_t u64; 180 uint64_t size; 181 uint64_t offset; 182 } VhostUserVringArea; 183 184 typedef struct VhostUserInflight { 185 uint64_t mmap_size; 186 uint64_t mmap_offset; 187 uint16_t num_queues; 188 uint16_t queue_size; 189 } VhostUserInflight; 190 191 typedef struct VhostUserShared { 192 unsigned char uuid[16]; 193 } VhostUserShared; 194 195 typedef struct { 196 VhostUserRequest request; 197 198 #define VHOST_USER_VERSION_MASK (0x3) 199 #define VHOST_USER_REPLY_MASK (0x1 << 2) 200 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 201 uint32_t flags; 202 uint32_t size; /* the following payload size */ 203 } QEMU_PACKED VhostUserHeader; 204 205 /* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */ 206 typedef struct VhostUserTransferDeviceState { 207 uint32_t direction; 208 uint32_t phase; 209 } VhostUserTransferDeviceState; 210 211 typedef union { 212 #define VHOST_USER_VRING_IDX_MASK (0xff) 213 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 214 uint64_t u64; 215 struct vhost_vring_state state; 216 struct vhost_vring_addr addr; 217 VhostUserMemory memory; 218 VhostUserMemRegMsg mem_reg; 219 VhostUserLog log; 220 struct vhost_iotlb_msg iotlb; 221 VhostUserConfig config; 222 VhostUserCryptoSession session; 223 VhostUserVringArea area; 224 VhostUserInflight inflight; 225 VhostUserShared object; 226 VhostUserTransferDeviceState transfer_state; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *backend_ioc; 247 GSource *backend_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 276 { 277 struct vhost_user *u = dev->opaque; 278 CharBackend *chr = u->user->chr; 279 uint8_t *p = (uint8_t *) msg; 280 int r, size = VHOST_USER_HDR_SIZE; 281 282 r = qemu_chr_fe_read_all(chr, p, size); 283 if (r != size) { 284 int saved_errno = errno; 285 error_report("Failed to read msg header. Read %d instead of %d." 286 " Original request %d.", r, size, msg->hdr.request); 287 return r < 0 ? -saved_errno : -EIO; 288 } 289 290 /* validate received flags */ 291 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 292 error_report("Failed to read msg header." 293 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 294 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 295 return -EPROTO; 296 } 297 298 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 299 300 return 0; 301 } 302 303 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 304 { 305 struct vhost_user *u = dev->opaque; 306 CharBackend *chr = u->user->chr; 307 uint8_t *p = (uint8_t *) msg; 308 int r, size; 309 310 r = vhost_user_read_header(dev, msg); 311 if (r < 0) { 312 return r; 313 } 314 315 /* validate message size is sane */ 316 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 317 error_report("Failed to read msg header." 318 " Size %d exceeds the maximum %zu.", msg->hdr.size, 319 VHOST_USER_PAYLOAD_SIZE); 320 return -EPROTO; 321 } 322 323 if (msg->hdr.size) { 324 p += VHOST_USER_HDR_SIZE; 325 size = msg->hdr.size; 326 r = qemu_chr_fe_read_all(chr, p, size); 327 if (r != size) { 328 int saved_errno = errno; 329 error_report("Failed to read msg payload." 330 " Read %d instead of %d.", r, msg->hdr.size); 331 return r < 0 ? -saved_errno : -EIO; 332 } 333 } 334 335 return 0; 336 } 337 338 static int process_message_reply(struct vhost_dev *dev, 339 const VhostUserMsg *msg) 340 { 341 int ret; 342 VhostUserMsg msg_reply; 343 344 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 345 return 0; 346 } 347 348 ret = vhost_user_read(dev, &msg_reply); 349 if (ret < 0) { 350 return ret; 351 } 352 353 if (msg_reply.hdr.request != msg->hdr.request) { 354 error_report("Received unexpected msg type. " 355 "Expected %d received %d", 356 msg->hdr.request, msg_reply.hdr.request); 357 return -EPROTO; 358 } 359 360 return msg_reply.payload.u64 ? -EIO : 0; 361 } 362 363 static bool vhost_user_per_device_request(VhostUserRequest request) 364 { 365 switch (request) { 366 case VHOST_USER_SET_OWNER: 367 case VHOST_USER_RESET_OWNER: 368 case VHOST_USER_SET_MEM_TABLE: 369 case VHOST_USER_GET_QUEUE_NUM: 370 case VHOST_USER_NET_SET_MTU: 371 case VHOST_USER_RESET_DEVICE: 372 case VHOST_USER_ADD_MEM_REG: 373 case VHOST_USER_REM_MEM_REG: 374 return true; 375 default: 376 return false; 377 } 378 } 379 380 /* most non-init callers ignore the error */ 381 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 382 int *fds, int fd_num) 383 { 384 struct vhost_user *u = dev->opaque; 385 CharBackend *chr = u->user->chr; 386 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 387 388 /* 389 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 390 * while others, like virtio-net, contain multiple vhost_devs. For 391 * operations such as configuring device memory mappings or issuing device 392 * resets, which affect the whole device instead of individual VQs, 393 * vhost-user messages should only be sent once. 394 * 395 * Devices with multiple vhost_devs are given an associated dev->vq_index 396 * so per_device requests are only sent if vq_index is 0. 397 */ 398 if (vhost_user_per_device_request(msg->hdr.request) 399 && dev->vq_index != 0) { 400 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 401 return 0; 402 } 403 404 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 405 error_report("Failed to set msg fds."); 406 return -EINVAL; 407 } 408 409 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 410 if (ret != size) { 411 int saved_errno = errno; 412 error_report("Failed to write msg." 413 " Wrote %d instead of %d.", ret, size); 414 return ret < 0 ? -saved_errno : -EIO; 415 } 416 417 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 418 419 return 0; 420 } 421 422 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 423 { 424 VhostUserMsg msg = { 425 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 426 .hdr.flags = VHOST_USER_VERSION, 427 }; 428 429 return vhost_user_write(dev, &msg, &fd, 1); 430 } 431 432 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 433 struct vhost_log *log) 434 { 435 int fds[VHOST_USER_MAX_RAM_SLOTS]; 436 size_t fd_num = 0; 437 bool shmfd = virtio_has_feature(dev->protocol_features, 438 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 439 int ret; 440 VhostUserMsg msg = { 441 .hdr.request = VHOST_USER_SET_LOG_BASE, 442 .hdr.flags = VHOST_USER_VERSION, 443 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 444 .payload.log.mmap_offset = 0, 445 .hdr.size = sizeof(msg.payload.log), 446 }; 447 448 /* Send only once with first queue pair */ 449 if (dev->vq_index != 0) { 450 return 0; 451 } 452 453 if (shmfd && log->fd != -1) { 454 fds[fd_num++] = log->fd; 455 } 456 457 ret = vhost_user_write(dev, &msg, fds, fd_num); 458 if (ret < 0) { 459 return ret; 460 } 461 462 if (shmfd) { 463 msg.hdr.size = 0; 464 ret = vhost_user_read(dev, &msg); 465 if (ret < 0) { 466 return ret; 467 } 468 469 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 470 error_report("Received unexpected msg type. " 471 "Expected %d received %d", 472 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 473 return -EPROTO; 474 } 475 } 476 477 return 0; 478 } 479 480 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 481 int *fd) 482 { 483 MemoryRegion *mr; 484 485 assert((uintptr_t)addr == addr); 486 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 487 *fd = memory_region_get_fd(mr); 488 *offset += mr->ram_block->fd_offset; 489 490 return mr; 491 } 492 493 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 494 struct vhost_memory_region *src, 495 uint64_t mmap_offset) 496 { 497 assert(src != NULL && dst != NULL); 498 dst->userspace_addr = src->userspace_addr; 499 dst->memory_size = src->memory_size; 500 dst->guest_phys_addr = src->guest_phys_addr; 501 dst->mmap_offset = mmap_offset; 502 } 503 504 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 505 struct vhost_dev *dev, 506 VhostUserMsg *msg, 507 int *fds, size_t *fd_num, 508 bool track_ramblocks) 509 { 510 int i, fd; 511 ram_addr_t offset; 512 MemoryRegion *mr; 513 struct vhost_memory_region *reg; 514 VhostUserMemoryRegion region_buffer; 515 516 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 517 518 for (i = 0; i < dev->mem->nregions; ++i) { 519 reg = dev->mem->regions + i; 520 521 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 522 if (fd > 0) { 523 if (track_ramblocks) { 524 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 525 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 526 reg->memory_size, 527 reg->guest_phys_addr, 528 reg->userspace_addr, 529 offset); 530 u->region_rb_offset[i] = offset; 531 u->region_rb[i] = mr->ram_block; 532 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 533 error_report("Failed preparing vhost-user memory table msg"); 534 return -ENOBUFS; 535 } 536 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 537 msg->payload.memory.regions[*fd_num] = region_buffer; 538 fds[(*fd_num)++] = fd; 539 } else if (track_ramblocks) { 540 u->region_rb_offset[i] = 0; 541 u->region_rb[i] = NULL; 542 } 543 } 544 545 msg->payload.memory.nregions = *fd_num; 546 547 if (!*fd_num) { 548 error_report("Failed initializing vhost-user memory map, " 549 "consider using -object memory-backend-file share=on"); 550 return -EINVAL; 551 } 552 553 msg->hdr.size = sizeof(msg->payload.memory.nregions); 554 msg->hdr.size += sizeof(msg->payload.memory.padding); 555 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 556 557 return 0; 558 } 559 560 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 561 struct vhost_memory_region *vdev_reg) 562 { 563 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 564 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 565 shadow_reg->memory_size == vdev_reg->memory_size; 566 } 567 568 static void scrub_shadow_regions(struct vhost_dev *dev, 569 struct scrub_regions *add_reg, 570 int *nr_add_reg, 571 struct scrub_regions *rem_reg, 572 int *nr_rem_reg, uint64_t *shadow_pcb, 573 bool track_ramblocks) 574 { 575 struct vhost_user *u = dev->opaque; 576 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 577 struct vhost_memory_region *reg, *shadow_reg; 578 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 579 ram_addr_t offset; 580 MemoryRegion *mr; 581 bool matching; 582 583 /* 584 * Find memory regions present in our shadow state which are not in 585 * the device's current memory state. 586 * 587 * Mark regions in both the shadow and device state as "found". 588 */ 589 for (i = 0; i < u->num_shadow_regions; i++) { 590 shadow_reg = &u->shadow_regions[i]; 591 matching = false; 592 593 for (j = 0; j < dev->mem->nregions; j++) { 594 reg = &dev->mem->regions[j]; 595 596 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 597 598 if (reg_equal(shadow_reg, reg)) { 599 matching = true; 600 found[j] = true; 601 if (track_ramblocks) { 602 /* 603 * Reset postcopy client bases, region_rb, and 604 * region_rb_offset in case regions are removed. 605 */ 606 if (fd > 0) { 607 u->region_rb_offset[j] = offset; 608 u->region_rb[j] = mr->ram_block; 609 shadow_pcb[j] = u->postcopy_client_bases[i]; 610 } else { 611 u->region_rb_offset[j] = 0; 612 u->region_rb[j] = NULL; 613 } 614 } 615 break; 616 } 617 } 618 619 /* 620 * If the region was not found in the current device memory state 621 * create an entry for it in the removed list. 622 */ 623 if (!matching) { 624 rem_reg[rm_idx].region = shadow_reg; 625 rem_reg[rm_idx++].reg_idx = i; 626 } 627 } 628 629 /* 630 * For regions not marked "found", create entries in the added list. 631 * 632 * Note their indexes in the device memory state and the indexes of their 633 * file descriptors. 634 */ 635 for (i = 0; i < dev->mem->nregions; i++) { 636 reg = &dev->mem->regions[i]; 637 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 638 if (fd > 0) { 639 ++fd_num; 640 } 641 642 /* 643 * If the region was in both the shadow and device state we don't 644 * need to send a VHOST_USER_ADD_MEM_REG message for it. 645 */ 646 if (found[i]) { 647 continue; 648 } 649 650 add_reg[add_idx].region = reg; 651 add_reg[add_idx].reg_idx = i; 652 add_reg[add_idx++].fd_idx = fd_num; 653 } 654 *nr_rem_reg = rm_idx; 655 *nr_add_reg = add_idx; 656 657 return; 658 } 659 660 static int send_remove_regions(struct vhost_dev *dev, 661 struct scrub_regions *remove_reg, 662 int nr_rem_reg, VhostUserMsg *msg, 663 bool reply_supported) 664 { 665 struct vhost_user *u = dev->opaque; 666 struct vhost_memory_region *shadow_reg; 667 int i, fd, shadow_reg_idx, ret; 668 ram_addr_t offset; 669 VhostUserMemoryRegion region_buffer; 670 671 /* 672 * The regions in remove_reg appear in the same order they do in the 673 * shadow table. Therefore we can minimize memory copies by iterating 674 * through remove_reg backwards. 675 */ 676 for (i = nr_rem_reg - 1; i >= 0; i--) { 677 shadow_reg = remove_reg[i].region; 678 shadow_reg_idx = remove_reg[i].reg_idx; 679 680 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 681 682 if (fd > 0) { 683 msg->hdr.request = VHOST_USER_REM_MEM_REG; 684 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 685 msg->payload.mem_reg.region = region_buffer; 686 687 ret = vhost_user_write(dev, msg, NULL, 0); 688 if (ret < 0) { 689 return ret; 690 } 691 692 if (reply_supported) { 693 ret = process_message_reply(dev, msg); 694 if (ret) { 695 return ret; 696 } 697 } 698 } 699 700 /* 701 * At this point we know the backend has unmapped the region. It is now 702 * safe to remove it from the shadow table. 703 */ 704 memmove(&u->shadow_regions[shadow_reg_idx], 705 &u->shadow_regions[shadow_reg_idx + 1], 706 sizeof(struct vhost_memory_region) * 707 (u->num_shadow_regions - shadow_reg_idx - 1)); 708 u->num_shadow_regions--; 709 } 710 711 return 0; 712 } 713 714 static int send_add_regions(struct vhost_dev *dev, 715 struct scrub_regions *add_reg, int nr_add_reg, 716 VhostUserMsg *msg, uint64_t *shadow_pcb, 717 bool reply_supported, bool track_ramblocks) 718 { 719 struct vhost_user *u = dev->opaque; 720 int i, fd, ret, reg_idx, reg_fd_idx; 721 struct vhost_memory_region *reg; 722 MemoryRegion *mr; 723 ram_addr_t offset; 724 VhostUserMsg msg_reply; 725 VhostUserMemoryRegion region_buffer; 726 727 for (i = 0; i < nr_add_reg; i++) { 728 reg = add_reg[i].region; 729 reg_idx = add_reg[i].reg_idx; 730 reg_fd_idx = add_reg[i].fd_idx; 731 732 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 733 734 if (fd > 0) { 735 if (track_ramblocks) { 736 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 737 reg->memory_size, 738 reg->guest_phys_addr, 739 reg->userspace_addr, 740 offset); 741 u->region_rb_offset[reg_idx] = offset; 742 u->region_rb[reg_idx] = mr->ram_block; 743 } 744 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 745 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 746 msg->payload.mem_reg.region = region_buffer; 747 748 ret = vhost_user_write(dev, msg, &fd, 1); 749 if (ret < 0) { 750 return ret; 751 } 752 753 if (track_ramblocks) { 754 uint64_t reply_gpa; 755 756 ret = vhost_user_read(dev, &msg_reply); 757 if (ret < 0) { 758 return ret; 759 } 760 761 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 762 763 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 764 error_report("%s: Received unexpected msg type." 765 "Expected %d received %d", __func__, 766 VHOST_USER_ADD_MEM_REG, 767 msg_reply.hdr.request); 768 return -EPROTO; 769 } 770 771 /* 772 * We're using the same structure, just reusing one of the 773 * fields, so it should be the same size. 774 */ 775 if (msg_reply.hdr.size != msg->hdr.size) { 776 error_report("%s: Unexpected size for postcopy reply " 777 "%d vs %d", __func__, msg_reply.hdr.size, 778 msg->hdr.size); 779 return -EPROTO; 780 } 781 782 /* Get the postcopy client base from the backend's reply. */ 783 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 784 shadow_pcb[reg_idx] = 785 msg_reply.payload.mem_reg.region.userspace_addr; 786 trace_vhost_user_set_mem_table_postcopy( 787 msg_reply.payload.mem_reg.region.userspace_addr, 788 msg->payload.mem_reg.region.userspace_addr, 789 reg_fd_idx, reg_idx); 790 } else { 791 error_report("%s: invalid postcopy reply for region. " 792 "Got guest physical address %" PRIX64 ", expected " 793 "%" PRIX64, __func__, reply_gpa, 794 dev->mem->regions[reg_idx].guest_phys_addr); 795 return -EPROTO; 796 } 797 } else if (reply_supported) { 798 ret = process_message_reply(dev, msg); 799 if (ret) { 800 return ret; 801 } 802 } 803 } else if (track_ramblocks) { 804 u->region_rb_offset[reg_idx] = 0; 805 u->region_rb[reg_idx] = NULL; 806 } 807 808 /* 809 * At this point, we know the backend has mapped in the new 810 * region, if the region has a valid file descriptor. 811 * 812 * The region should now be added to the shadow table. 813 */ 814 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 815 reg->guest_phys_addr; 816 u->shadow_regions[u->num_shadow_regions].userspace_addr = 817 reg->userspace_addr; 818 u->shadow_regions[u->num_shadow_regions].memory_size = 819 reg->memory_size; 820 u->num_shadow_regions++; 821 } 822 823 return 0; 824 } 825 826 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 827 VhostUserMsg *msg, 828 bool reply_supported, 829 bool track_ramblocks) 830 { 831 struct vhost_user *u = dev->opaque; 832 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 833 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 834 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 835 int nr_add_reg, nr_rem_reg; 836 int ret; 837 838 msg->hdr.size = sizeof(msg->payload.mem_reg); 839 840 /* Find the regions which need to be removed or added. */ 841 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 842 shadow_pcb, track_ramblocks); 843 844 if (nr_rem_reg) { 845 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 846 reply_supported); 847 if (ret < 0) { 848 goto err; 849 } 850 } 851 852 if (nr_add_reg) { 853 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 854 reply_supported, track_ramblocks); 855 if (ret < 0) { 856 goto err; 857 } 858 } 859 860 if (track_ramblocks) { 861 memcpy(u->postcopy_client_bases, shadow_pcb, 862 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 863 /* 864 * Now we've registered this with the postcopy code, we ack to the 865 * client, because now we're in the position to be able to deal with 866 * any faults it generates. 867 */ 868 /* TODO: Use this for failure cases as well with a bad value. */ 869 msg->hdr.size = sizeof(msg->payload.u64); 870 msg->payload.u64 = 0; /* OK */ 871 872 ret = vhost_user_write(dev, msg, NULL, 0); 873 if (ret < 0) { 874 return ret; 875 } 876 } 877 878 return 0; 879 880 err: 881 if (track_ramblocks) { 882 memcpy(u->postcopy_client_bases, shadow_pcb, 883 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 884 } 885 886 return ret; 887 } 888 889 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 890 struct vhost_memory *mem, 891 bool reply_supported, 892 bool config_mem_slots) 893 { 894 struct vhost_user *u = dev->opaque; 895 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 896 size_t fd_num = 0; 897 VhostUserMsg msg_reply; 898 int region_i, msg_i; 899 int ret; 900 901 VhostUserMsg msg = { 902 .hdr.flags = VHOST_USER_VERSION, 903 }; 904 905 if (u->region_rb_len < dev->mem->nregions) { 906 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 907 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 908 dev->mem->nregions); 909 memset(&(u->region_rb[u->region_rb_len]), '\0', 910 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 911 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 912 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 913 u->region_rb_len = dev->mem->nregions; 914 } 915 916 if (config_mem_slots) { 917 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 918 if (ret < 0) { 919 return ret; 920 } 921 } else { 922 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 923 true); 924 if (ret < 0) { 925 return ret; 926 } 927 928 ret = vhost_user_write(dev, &msg, fds, fd_num); 929 if (ret < 0) { 930 return ret; 931 } 932 933 ret = vhost_user_read(dev, &msg_reply); 934 if (ret < 0) { 935 return ret; 936 } 937 938 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 939 error_report("%s: Received unexpected msg type." 940 "Expected %d received %d", __func__, 941 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 942 return -EPROTO; 943 } 944 945 /* 946 * We're using the same structure, just reusing one of the 947 * fields, so it should be the same size. 948 */ 949 if (msg_reply.hdr.size != msg.hdr.size) { 950 error_report("%s: Unexpected size for postcopy reply " 951 "%d vs %d", __func__, msg_reply.hdr.size, 952 msg.hdr.size); 953 return -EPROTO; 954 } 955 956 memset(u->postcopy_client_bases, 0, 957 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 958 959 /* 960 * They're in the same order as the regions that were sent 961 * but some of the regions were skipped (above) if they 962 * didn't have fd's 963 */ 964 for (msg_i = 0, region_i = 0; 965 region_i < dev->mem->nregions; 966 region_i++) { 967 if (msg_i < fd_num && 968 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 969 dev->mem->regions[region_i].guest_phys_addr) { 970 u->postcopy_client_bases[region_i] = 971 msg_reply.payload.memory.regions[msg_i].userspace_addr; 972 trace_vhost_user_set_mem_table_postcopy( 973 msg_reply.payload.memory.regions[msg_i].userspace_addr, 974 msg.payload.memory.regions[msg_i].userspace_addr, 975 msg_i, region_i); 976 msg_i++; 977 } 978 } 979 if (msg_i != fd_num) { 980 error_report("%s: postcopy reply not fully consumed " 981 "%d vs %zd", 982 __func__, msg_i, fd_num); 983 return -EIO; 984 } 985 986 /* 987 * Now we've registered this with the postcopy code, we ack to the 988 * client, because now we're in the position to be able to deal 989 * with any faults it generates. 990 */ 991 /* TODO: Use this for failure cases as well with a bad value. */ 992 msg.hdr.size = sizeof(msg.payload.u64); 993 msg.payload.u64 = 0; /* OK */ 994 ret = vhost_user_write(dev, &msg, NULL, 0); 995 if (ret < 0) { 996 return ret; 997 } 998 } 999 1000 return 0; 1001 } 1002 1003 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1004 struct vhost_memory *mem) 1005 { 1006 struct vhost_user *u = dev->opaque; 1007 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1008 size_t fd_num = 0; 1009 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1010 bool reply_supported = virtio_has_feature(dev->protocol_features, 1011 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1012 bool config_mem_slots = 1013 virtio_has_feature(dev->protocol_features, 1014 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1015 int ret; 1016 1017 if (do_postcopy) { 1018 /* 1019 * Postcopy has enough differences that it's best done in it's own 1020 * version 1021 */ 1022 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1023 config_mem_slots); 1024 } 1025 1026 VhostUserMsg msg = { 1027 .hdr.flags = VHOST_USER_VERSION, 1028 }; 1029 1030 if (reply_supported) { 1031 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1032 } 1033 1034 if (config_mem_slots) { 1035 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1036 if (ret < 0) { 1037 return ret; 1038 } 1039 } else { 1040 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1041 false); 1042 if (ret < 0) { 1043 return ret; 1044 } 1045 1046 ret = vhost_user_write(dev, &msg, fds, fd_num); 1047 if (ret < 0) { 1048 return ret; 1049 } 1050 1051 if (reply_supported) { 1052 return process_message_reply(dev, &msg); 1053 } 1054 } 1055 1056 return 0; 1057 } 1058 1059 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1060 struct vhost_vring_state *ring) 1061 { 1062 bool cross_endian = virtio_has_feature(dev->protocol_features, 1063 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1064 VhostUserMsg msg = { 1065 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1066 .hdr.flags = VHOST_USER_VERSION, 1067 .payload.state = *ring, 1068 .hdr.size = sizeof(msg.payload.state), 1069 }; 1070 1071 if (!cross_endian) { 1072 error_report("vhost-user trying to send unhandled ioctl"); 1073 return -ENOTSUP; 1074 } 1075 1076 return vhost_user_write(dev, &msg, NULL, 0); 1077 } 1078 1079 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1080 { 1081 int ret; 1082 VhostUserMsg msg = { 1083 .hdr.request = request, 1084 .hdr.flags = VHOST_USER_VERSION, 1085 }; 1086 1087 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1088 return 0; 1089 } 1090 1091 ret = vhost_user_write(dev, &msg, NULL, 0); 1092 if (ret < 0) { 1093 return ret; 1094 } 1095 1096 ret = vhost_user_read(dev, &msg); 1097 if (ret < 0) { 1098 return ret; 1099 } 1100 1101 if (msg.hdr.request != request) { 1102 error_report("Received unexpected msg type. Expected %d received %d", 1103 request, msg.hdr.request); 1104 return -EPROTO; 1105 } 1106 1107 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1108 error_report("Received bad msg size."); 1109 return -EPROTO; 1110 } 1111 1112 *u64 = msg.payload.u64; 1113 1114 return 0; 1115 } 1116 1117 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1118 { 1119 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1120 return -EPROTO; 1121 } 1122 1123 return 0; 1124 } 1125 1126 /* Note: "msg->hdr.flags" may be modified. */ 1127 static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, 1128 bool wait_for_reply) 1129 { 1130 int ret; 1131 1132 if (wait_for_reply) { 1133 bool reply_supported = virtio_has_feature(dev->protocol_features, 1134 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1135 if (reply_supported) { 1136 msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1137 } 1138 } 1139 1140 ret = vhost_user_write(dev, msg, NULL, 0); 1141 if (ret < 0) { 1142 return ret; 1143 } 1144 1145 if (wait_for_reply) { 1146 uint64_t dummy; 1147 1148 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1149 return process_message_reply(dev, msg); 1150 } 1151 1152 /* 1153 * We need to wait for a reply but the backend does not 1154 * support replies for the command we just sent. 1155 * Send VHOST_USER_GET_FEATURES which makes all backends 1156 * send a reply. 1157 */ 1158 return vhost_user_get_features(dev, &dummy); 1159 } 1160 1161 return 0; 1162 } 1163 1164 static int vhost_set_vring(struct vhost_dev *dev, 1165 unsigned long int request, 1166 struct vhost_vring_state *ring, 1167 bool wait_for_reply) 1168 { 1169 VhostUserMsg msg = { 1170 .hdr.request = request, 1171 .hdr.flags = VHOST_USER_VERSION, 1172 .payload.state = *ring, 1173 .hdr.size = sizeof(msg.payload.state), 1174 }; 1175 1176 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1177 } 1178 1179 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1180 struct vhost_vring_state *ring) 1181 { 1182 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); 1183 } 1184 1185 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1186 { 1187 assert(n && n->unmap_addr); 1188 munmap(n->unmap_addr, qemu_real_host_page_size()); 1189 n->unmap_addr = NULL; 1190 } 1191 1192 /* 1193 * clean-up function for notifier, will finally free the structure 1194 * under rcu. 1195 */ 1196 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1197 VirtIODevice *vdev) 1198 { 1199 if (n->addr) { 1200 if (vdev) { 1201 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1202 } 1203 assert(!n->unmap_addr); 1204 n->unmap_addr = n->addr; 1205 n->addr = NULL; 1206 call_rcu(n, vhost_user_host_notifier_free, rcu); 1207 } 1208 } 1209 1210 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1211 struct vhost_vring_state *ring) 1212 { 1213 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); 1214 } 1215 1216 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1217 { 1218 int i; 1219 1220 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1221 return -EINVAL; 1222 } 1223 1224 for (i = 0; i < dev->nvqs; ++i) { 1225 int ret; 1226 struct vhost_vring_state state = { 1227 .index = dev->vq_index + i, 1228 .num = enable, 1229 }; 1230 1231 /* 1232 * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / 1233 * control plane thread via unix domain socket. Virtio requests travel 1234 * from guest to vhost-user backend / data plane thread via eventfd. 1235 * Even if the guest enables the ring first, and pushes its first virtio 1236 * request second (conforming to the virtio spec), the data plane thread 1237 * in the backend may see the virtio request before the control plane 1238 * thread sees the queue enablement. This causes (in fact, requires) the 1239 * data plane thread to discard the virtio request (it arrived on a 1240 * seemingly disabled queue). To prevent this out-of-order delivery, 1241 * don't let the guest proceed to pushing the virtio request until the 1242 * backend control plane acknowledges enabling the queue -- IOW, pass 1243 * wait_for_reply=true below. 1244 */ 1245 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); 1246 if (ret < 0) { 1247 /* 1248 * Restoring the previous state is likely infeasible, as well as 1249 * proceeding regardless the error, so just bail out and hope for 1250 * the device-level recovery. 1251 */ 1252 return ret; 1253 } 1254 } 1255 1256 return 0; 1257 } 1258 1259 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1260 int idx) 1261 { 1262 if (idx >= u->notifiers->len) { 1263 return NULL; 1264 } 1265 return g_ptr_array_index(u->notifiers, idx); 1266 } 1267 1268 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1269 struct vhost_vring_state *ring) 1270 { 1271 int ret; 1272 VhostUserMsg msg = { 1273 .hdr.request = VHOST_USER_GET_VRING_BASE, 1274 .hdr.flags = VHOST_USER_VERSION, 1275 .payload.state = *ring, 1276 .hdr.size = sizeof(msg.payload.state), 1277 }; 1278 struct vhost_user *u = dev->opaque; 1279 1280 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1281 if (n) { 1282 vhost_user_host_notifier_remove(n, dev->vdev); 1283 } 1284 1285 ret = vhost_user_write(dev, &msg, NULL, 0); 1286 if (ret < 0) { 1287 return ret; 1288 } 1289 1290 ret = vhost_user_read(dev, &msg); 1291 if (ret < 0) { 1292 return ret; 1293 } 1294 1295 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1296 error_report("Received unexpected msg type. Expected %d received %d", 1297 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1298 return -EPROTO; 1299 } 1300 1301 if (msg.hdr.size != sizeof(msg.payload.state)) { 1302 error_report("Received bad msg size."); 1303 return -EPROTO; 1304 } 1305 1306 *ring = msg.payload.state; 1307 1308 return 0; 1309 } 1310 1311 static int vhost_set_vring_file(struct vhost_dev *dev, 1312 VhostUserRequest request, 1313 struct vhost_vring_file *file) 1314 { 1315 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1316 size_t fd_num = 0; 1317 VhostUserMsg msg = { 1318 .hdr.request = request, 1319 .hdr.flags = VHOST_USER_VERSION, 1320 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1321 .hdr.size = sizeof(msg.payload.u64), 1322 }; 1323 1324 if (file->fd > 0) { 1325 fds[fd_num++] = file->fd; 1326 } else { 1327 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1328 } 1329 1330 return vhost_user_write(dev, &msg, fds, fd_num); 1331 } 1332 1333 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1334 struct vhost_vring_file *file) 1335 { 1336 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1337 } 1338 1339 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1340 struct vhost_vring_file *file) 1341 { 1342 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1343 } 1344 1345 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1346 struct vhost_vring_file *file) 1347 { 1348 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1349 } 1350 1351 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1352 struct vhost_vring_addr *addr) 1353 { 1354 VhostUserMsg msg = { 1355 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1356 .hdr.flags = VHOST_USER_VERSION, 1357 .payload.addr = *addr, 1358 .hdr.size = sizeof(msg.payload.addr), 1359 }; 1360 1361 /* 1362 * wait for a reply if logging is enabled to make sure 1363 * backend is actually logging changes 1364 */ 1365 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1366 1367 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1368 } 1369 1370 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1371 bool wait_for_reply) 1372 { 1373 VhostUserMsg msg = { 1374 .hdr.request = request, 1375 .hdr.flags = VHOST_USER_VERSION, 1376 .payload.u64 = u64, 1377 .hdr.size = sizeof(msg.payload.u64), 1378 }; 1379 1380 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1381 } 1382 1383 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1384 { 1385 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1386 } 1387 1388 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1389 { 1390 uint64_t value; 1391 int ret; 1392 1393 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1394 if (ret < 0) { 1395 return ret; 1396 } 1397 *status = value; 1398 1399 return 0; 1400 } 1401 1402 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1403 { 1404 uint8_t s; 1405 int ret; 1406 1407 ret = vhost_user_get_status(dev, &s); 1408 if (ret < 0) { 1409 return ret; 1410 } 1411 1412 if ((s & status) == status) { 1413 return 0; 1414 } 1415 s |= status; 1416 1417 return vhost_user_set_status(dev, s); 1418 } 1419 1420 static int vhost_user_set_features(struct vhost_dev *dev, 1421 uint64_t features) 1422 { 1423 /* 1424 * wait for a reply if logging is enabled to make sure 1425 * backend is actually logging changes 1426 */ 1427 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1428 int ret; 1429 1430 /* 1431 * We need to include any extra backend only feature bits that 1432 * might be needed by our device. Currently this includes the 1433 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1434 * features. 1435 */ 1436 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1437 features | dev->backend_features, 1438 log_enabled); 1439 1440 if (virtio_has_feature(dev->protocol_features, 1441 VHOST_USER_PROTOCOL_F_STATUS)) { 1442 if (!ret) { 1443 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1444 } 1445 } 1446 1447 return ret; 1448 } 1449 1450 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1451 uint64_t features) 1452 { 1453 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1454 false); 1455 } 1456 1457 static int vhost_user_set_owner(struct vhost_dev *dev) 1458 { 1459 VhostUserMsg msg = { 1460 .hdr.request = VHOST_USER_SET_OWNER, 1461 .hdr.flags = VHOST_USER_VERSION, 1462 }; 1463 1464 return vhost_user_write(dev, &msg, NULL, 0); 1465 } 1466 1467 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1468 uint64_t *max_memslots) 1469 { 1470 uint64_t backend_max_memslots; 1471 int err; 1472 1473 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1474 &backend_max_memslots); 1475 if (err < 0) { 1476 return err; 1477 } 1478 1479 *max_memslots = backend_max_memslots; 1480 1481 return 0; 1482 } 1483 1484 static int vhost_user_reset_device(struct vhost_dev *dev) 1485 { 1486 VhostUserMsg msg = { 1487 .hdr.flags = VHOST_USER_VERSION, 1488 .hdr.request = VHOST_USER_RESET_DEVICE, 1489 }; 1490 1491 /* 1492 * Historically, reset was not implemented so only reset devices 1493 * that are expecting it. 1494 */ 1495 if (!virtio_has_feature(dev->protocol_features, 1496 VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { 1497 return -ENOSYS; 1498 } 1499 1500 return vhost_user_write(dev, &msg, NULL, 0); 1501 } 1502 1503 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1504 { 1505 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1506 return -ENOSYS; 1507 } 1508 1509 return dev->config_ops->vhost_dev_config_notifier(dev); 1510 } 1511 1512 /* 1513 * Fetch or create the notifier for a given idx. Newly created 1514 * notifiers are added to the pointer array that tracks them. 1515 */ 1516 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1517 int idx) 1518 { 1519 VhostUserHostNotifier *n = NULL; 1520 if (idx >= u->notifiers->len) { 1521 g_ptr_array_set_size(u->notifiers, idx + 1); 1522 } 1523 1524 n = g_ptr_array_index(u->notifiers, idx); 1525 if (!n) { 1526 /* 1527 * In case notification arrive out-of-order, 1528 * make room for current index. 1529 */ 1530 g_ptr_array_remove_index(u->notifiers, idx); 1531 n = g_new0(VhostUserHostNotifier, 1); 1532 n->idx = idx; 1533 g_ptr_array_insert(u->notifiers, idx, n); 1534 trace_vhost_user_create_notifier(idx, n); 1535 } 1536 1537 return n; 1538 } 1539 1540 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1541 VhostUserVringArea *area, 1542 int fd) 1543 { 1544 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1545 size_t page_size = qemu_real_host_page_size(); 1546 struct vhost_user *u = dev->opaque; 1547 VhostUserState *user = u->user; 1548 VirtIODevice *vdev = dev->vdev; 1549 VhostUserHostNotifier *n; 1550 void *addr; 1551 char *name; 1552 1553 if (!virtio_has_feature(dev->protocol_features, 1554 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1555 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1556 return -EINVAL; 1557 } 1558 1559 /* 1560 * Fetch notifier and invalidate any old data before setting up 1561 * new mapped address. 1562 */ 1563 n = fetch_or_create_notifier(user, queue_idx); 1564 vhost_user_host_notifier_remove(n, vdev); 1565 1566 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1567 return 0; 1568 } 1569 1570 /* Sanity check. */ 1571 if (area->size != page_size) { 1572 return -EINVAL; 1573 } 1574 1575 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1576 fd, area->offset); 1577 if (addr == MAP_FAILED) { 1578 return -EFAULT; 1579 } 1580 1581 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1582 user, queue_idx); 1583 if (!n->mr.ram) { /* Don't init again after suspend. */ 1584 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1585 page_size, addr); 1586 } else { 1587 n->mr.ram_block->host = addr; 1588 } 1589 g_free(name); 1590 1591 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1592 object_unparent(OBJECT(&n->mr)); 1593 munmap(addr, page_size); 1594 return -ENXIO; 1595 } 1596 1597 n->addr = addr; 1598 1599 return 0; 1600 } 1601 1602 static int 1603 vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, 1604 VhostUserShared *object) 1605 { 1606 QemuUUID uuid; 1607 1608 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1609 return virtio_add_vhost_device(&uuid, dev); 1610 } 1611 1612 static int 1613 vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, 1614 VhostUserShared *object) 1615 { 1616 QemuUUID uuid; 1617 1618 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1619 switch (virtio_object_type(&uuid)) { 1620 case TYPE_VHOST_DEV: 1621 { 1622 struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); 1623 if (dev != owner) { 1624 /* Not allowed to remove non-owned entries */ 1625 return 0; 1626 } 1627 break; 1628 } 1629 default: 1630 /* Not allowed to remove non-owned entries */ 1631 return 0; 1632 } 1633 1634 return virtio_remove_resource(&uuid); 1635 } 1636 1637 static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, 1638 VhostUserPayload *payload, Error **errp) 1639 { 1640 struct iovec iov[] = { 1641 { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, 1642 { .iov_base = payload, .iov_len = hdr->size }, 1643 }; 1644 1645 hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; 1646 hdr->flags |= VHOST_USER_REPLY_MASK; 1647 1648 return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); 1649 } 1650 1651 static bool 1652 vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, 1653 VhostUserPayload *payload, Error **errp) 1654 { 1655 hdr->size = sizeof(payload->u64); 1656 return vhost_user_send_resp(ioc, hdr, payload, errp); 1657 } 1658 1659 int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, 1660 int *dmabuf_fd) 1661 { 1662 struct vhost_user *u = dev->opaque; 1663 CharBackend *chr = u->user->chr; 1664 int ret; 1665 VhostUserMsg msg = { 1666 .hdr.request = VHOST_USER_GET_SHARED_OBJECT, 1667 .hdr.flags = VHOST_USER_VERSION, 1668 }; 1669 memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); 1670 1671 ret = vhost_user_write(dev, &msg, NULL, 0); 1672 if (ret < 0) { 1673 return ret; 1674 } 1675 1676 ret = vhost_user_read(dev, &msg); 1677 if (ret < 0) { 1678 return ret; 1679 } 1680 1681 if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { 1682 error_report("Received unexpected msg type. " 1683 "Expected %d received %d", 1684 VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); 1685 return -EPROTO; 1686 } 1687 1688 *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); 1689 if (*dmabuf_fd < 0) { 1690 error_report("Failed to get dmabuf fd"); 1691 return -EIO; 1692 } 1693 1694 return 0; 1695 } 1696 1697 static int 1698 vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, 1699 QIOChannel *ioc, 1700 VhostUserHeader *hdr, 1701 VhostUserPayload *payload) 1702 { 1703 QemuUUID uuid; 1704 CharBackend *chr = u->user->chr; 1705 Error *local_err = NULL; 1706 int dmabuf_fd = -1; 1707 int fd_num = 0; 1708 1709 memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); 1710 1711 payload->u64 = 0; 1712 switch (virtio_object_type(&uuid)) { 1713 case TYPE_DMABUF: 1714 dmabuf_fd = virtio_lookup_dmabuf(&uuid); 1715 break; 1716 case TYPE_VHOST_DEV: 1717 { 1718 struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); 1719 if (dev == NULL) { 1720 payload->u64 = -EINVAL; 1721 break; 1722 } 1723 int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); 1724 if (ret < 0) { 1725 payload->u64 = ret; 1726 } 1727 break; 1728 } 1729 case TYPE_INVALID: 1730 payload->u64 = -EINVAL; 1731 break; 1732 } 1733 1734 if (dmabuf_fd != -1) { 1735 fd_num++; 1736 } 1737 1738 if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { 1739 error_report("Failed to set msg fds."); 1740 payload->u64 = -EINVAL; 1741 } 1742 1743 if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { 1744 error_report_err(local_err); 1745 return -EINVAL; 1746 } 1747 1748 return 0; 1749 } 1750 1751 static void close_backend_channel(struct vhost_user *u) 1752 { 1753 g_source_destroy(u->backend_src); 1754 g_source_unref(u->backend_src); 1755 u->backend_src = NULL; 1756 object_unref(OBJECT(u->backend_ioc)); 1757 u->backend_ioc = NULL; 1758 } 1759 1760 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1761 gpointer opaque) 1762 { 1763 struct vhost_dev *dev = opaque; 1764 struct vhost_user *u = dev->opaque; 1765 VhostUserHeader hdr = { 0, }; 1766 VhostUserPayload payload = { 0, }; 1767 Error *local_err = NULL; 1768 gboolean rc = G_SOURCE_CONTINUE; 1769 int ret = 0; 1770 struct iovec iov; 1771 g_autofree int *fd = NULL; 1772 size_t fdsize = 0; 1773 int i; 1774 1775 /* Read header */ 1776 iov.iov_base = &hdr; 1777 iov.iov_len = VHOST_USER_HDR_SIZE; 1778 1779 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1780 error_report_err(local_err); 1781 goto err; 1782 } 1783 1784 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1785 error_report("Failed to read msg header." 1786 " Size %d exceeds the maximum %zu.", hdr.size, 1787 VHOST_USER_PAYLOAD_SIZE); 1788 goto err; 1789 } 1790 1791 /* Read payload */ 1792 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1793 error_report_err(local_err); 1794 goto err; 1795 } 1796 1797 switch (hdr.request) { 1798 case VHOST_USER_BACKEND_IOTLB_MSG: 1799 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1800 break; 1801 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1802 ret = vhost_user_backend_handle_config_change(dev); 1803 break; 1804 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1805 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1806 fd ? fd[0] : -1); 1807 break; 1808 case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: 1809 ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); 1810 break; 1811 case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: 1812 ret = vhost_user_backend_handle_shared_object_remove(dev, 1813 &payload.object); 1814 break; 1815 case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: 1816 ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, 1817 &hdr, &payload); 1818 break; 1819 default: 1820 error_report("Received unexpected msg type: %d.", hdr.request); 1821 ret = -EINVAL; 1822 } 1823 1824 /* 1825 * REPLY_ACK feature handling. Other reply types has to be managed 1826 * directly in their request handlers. 1827 */ 1828 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1829 payload.u64 = !!ret; 1830 hdr.size = sizeof(payload.u64); 1831 1832 if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { 1833 error_report_err(local_err); 1834 goto err; 1835 } 1836 } 1837 1838 goto fdcleanup; 1839 1840 err: 1841 close_backend_channel(u); 1842 rc = G_SOURCE_REMOVE; 1843 1844 fdcleanup: 1845 if (fd) { 1846 for (i = 0; i < fdsize; i++) { 1847 close(fd[i]); 1848 } 1849 } 1850 return rc; 1851 } 1852 1853 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1854 { 1855 VhostUserMsg msg = { 1856 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1857 .hdr.flags = VHOST_USER_VERSION, 1858 }; 1859 struct vhost_user *u = dev->opaque; 1860 int sv[2], ret = 0; 1861 bool reply_supported = virtio_has_feature(dev->protocol_features, 1862 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1863 Error *local_err = NULL; 1864 QIOChannel *ioc; 1865 1866 if (!virtio_has_feature(dev->protocol_features, 1867 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1868 return 0; 1869 } 1870 1871 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1872 int saved_errno = errno; 1873 error_report("socketpair() failed"); 1874 return -saved_errno; 1875 } 1876 1877 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1878 if (!ioc) { 1879 error_report_err(local_err); 1880 return -ECONNREFUSED; 1881 } 1882 u->backend_ioc = ioc; 1883 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1884 G_IO_IN | G_IO_HUP, 1885 backend_read, dev, NULL, NULL); 1886 1887 if (reply_supported) { 1888 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1889 } 1890 1891 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1892 if (ret) { 1893 goto out; 1894 } 1895 1896 if (reply_supported) { 1897 ret = process_message_reply(dev, &msg); 1898 } 1899 1900 out: 1901 close(sv[1]); 1902 if (ret) { 1903 close_backend_channel(u); 1904 } 1905 1906 return ret; 1907 } 1908 1909 #ifdef CONFIG_LINUX 1910 /* 1911 * Called back from the postcopy fault thread when a fault is received on our 1912 * ufd. 1913 * TODO: This is Linux specific 1914 */ 1915 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1916 void *ufd) 1917 { 1918 struct vhost_dev *dev = pcfd->data; 1919 struct vhost_user *u = dev->opaque; 1920 struct uffd_msg *msg = ufd; 1921 uint64_t faultaddr = msg->arg.pagefault.address; 1922 RAMBlock *rb = NULL; 1923 uint64_t rb_offset; 1924 int i; 1925 1926 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1927 dev->mem->nregions); 1928 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1929 trace_vhost_user_postcopy_fault_handler_loop(i, 1930 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1931 if (faultaddr >= u->postcopy_client_bases[i]) { 1932 /* Ofset of the fault address in the vhost region */ 1933 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1934 if (region_offset < dev->mem->regions[i].memory_size) { 1935 rb_offset = region_offset + u->region_rb_offset[i]; 1936 trace_vhost_user_postcopy_fault_handler_found(i, 1937 region_offset, rb_offset); 1938 rb = u->region_rb[i]; 1939 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1940 rb_offset); 1941 } 1942 } 1943 } 1944 error_report("%s: Failed to find region for fault %" PRIx64, 1945 __func__, faultaddr); 1946 return -1; 1947 } 1948 1949 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1950 uint64_t offset) 1951 { 1952 struct vhost_dev *dev = pcfd->data; 1953 struct vhost_user *u = dev->opaque; 1954 int i; 1955 1956 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1957 1958 if (!u) { 1959 return 0; 1960 } 1961 /* Translate the offset into an address in the clients address space */ 1962 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1963 if (u->region_rb[i] == rb && 1964 offset >= u->region_rb_offset[i] && 1965 offset < (u->region_rb_offset[i] + 1966 dev->mem->regions[i].memory_size)) { 1967 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1968 u->postcopy_client_bases[i]; 1969 trace_vhost_user_postcopy_waker_found(client_addr); 1970 return postcopy_wake_shared(pcfd, client_addr, rb); 1971 } 1972 } 1973 1974 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1975 return 0; 1976 } 1977 #endif 1978 1979 /* 1980 * Called at the start of an inbound postcopy on reception of the 1981 * 'advise' command. 1982 */ 1983 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1984 { 1985 #ifdef CONFIG_LINUX 1986 struct vhost_user *u = dev->opaque; 1987 CharBackend *chr = u->user->chr; 1988 int ufd; 1989 int ret; 1990 VhostUserMsg msg = { 1991 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1992 .hdr.flags = VHOST_USER_VERSION, 1993 }; 1994 1995 ret = vhost_user_write(dev, &msg, NULL, 0); 1996 if (ret < 0) { 1997 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1998 return ret; 1999 } 2000 2001 ret = vhost_user_read(dev, &msg); 2002 if (ret < 0) { 2003 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 2004 return ret; 2005 } 2006 2007 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 2008 error_setg(errp, "Unexpected msg type. Expected %d received %d", 2009 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 2010 return -EPROTO; 2011 } 2012 2013 if (msg.hdr.size) { 2014 error_setg(errp, "Received bad msg size."); 2015 return -EPROTO; 2016 } 2017 ufd = qemu_chr_fe_get_msgfd(chr); 2018 if (ufd < 0) { 2019 error_setg(errp, "%s: Failed to get ufd", __func__); 2020 return -EIO; 2021 } 2022 qemu_socket_set_nonblock(ufd); 2023 2024 /* register ufd with userfault thread */ 2025 u->postcopy_fd.fd = ufd; 2026 u->postcopy_fd.data = dev; 2027 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 2028 u->postcopy_fd.waker = vhost_user_postcopy_waker; 2029 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 2030 postcopy_register_shared_ufd(&u->postcopy_fd); 2031 return 0; 2032 #else 2033 error_setg(errp, "Postcopy not supported on non-Linux systems"); 2034 return -ENOSYS; 2035 #endif 2036 } 2037 2038 /* 2039 * Called at the switch to postcopy on reception of the 'listen' command. 2040 */ 2041 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 2042 { 2043 struct vhost_user *u = dev->opaque; 2044 int ret; 2045 VhostUserMsg msg = { 2046 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 2047 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2048 }; 2049 u->postcopy_listen = true; 2050 2051 trace_vhost_user_postcopy_listen(); 2052 2053 ret = vhost_user_write(dev, &msg, NULL, 0); 2054 if (ret < 0) { 2055 error_setg(errp, "Failed to send postcopy_listen to vhost"); 2056 return ret; 2057 } 2058 2059 ret = process_message_reply(dev, &msg); 2060 if (ret) { 2061 error_setg(errp, "Failed to receive reply to postcopy_listen"); 2062 return ret; 2063 } 2064 2065 return 0; 2066 } 2067 2068 /* 2069 * Called at the end of postcopy 2070 */ 2071 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 2072 { 2073 VhostUserMsg msg = { 2074 .hdr.request = VHOST_USER_POSTCOPY_END, 2075 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2076 }; 2077 int ret; 2078 struct vhost_user *u = dev->opaque; 2079 2080 trace_vhost_user_postcopy_end_entry(); 2081 2082 ret = vhost_user_write(dev, &msg, NULL, 0); 2083 if (ret < 0) { 2084 error_setg(errp, "Failed to send postcopy_end to vhost"); 2085 return ret; 2086 } 2087 2088 ret = process_message_reply(dev, &msg); 2089 if (ret) { 2090 error_setg(errp, "Failed to receive reply to postcopy_end"); 2091 return ret; 2092 } 2093 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2094 close(u->postcopy_fd.fd); 2095 u->postcopy_fd.handler = NULL; 2096 2097 trace_vhost_user_postcopy_end_exit(); 2098 2099 return 0; 2100 } 2101 2102 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2103 void *opaque, Error **errp) 2104 { 2105 struct PostcopyNotifyData *pnd = opaque; 2106 struct vhost_user *u = container_of(notifier, struct vhost_user, 2107 postcopy_notifier); 2108 struct vhost_dev *dev = u->dev; 2109 2110 switch (pnd->reason) { 2111 case POSTCOPY_NOTIFY_PROBE: 2112 if (!virtio_has_feature(dev->protocol_features, 2113 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2114 /* TODO: Get the device name into this error somehow */ 2115 error_setg(errp, 2116 "vhost-user backend not capable of postcopy"); 2117 return -ENOENT; 2118 } 2119 break; 2120 2121 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2122 return vhost_user_postcopy_advise(dev, errp); 2123 2124 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2125 return vhost_user_postcopy_listen(dev, errp); 2126 2127 case POSTCOPY_NOTIFY_INBOUND_END: 2128 return vhost_user_postcopy_end(dev, errp); 2129 2130 default: 2131 /* We ignore notifications we don't know */ 2132 break; 2133 } 2134 2135 return 0; 2136 } 2137 2138 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2139 Error **errp) 2140 { 2141 uint64_t features, ram_slots; 2142 struct vhost_user *u; 2143 VhostUserState *vus = (VhostUserState *) opaque; 2144 int err; 2145 2146 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2147 2148 u = g_new0(struct vhost_user, 1); 2149 u->user = vus; 2150 u->dev = dev; 2151 dev->opaque = u; 2152 2153 err = vhost_user_get_features(dev, &features); 2154 if (err < 0) { 2155 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2156 return err; 2157 } 2158 2159 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2160 bool supports_f_config = vus->supports_config || 2161 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2162 uint64_t protocol_features; 2163 2164 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2165 2166 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2167 &protocol_features); 2168 if (err < 0) { 2169 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2170 return -EPROTO; 2171 } 2172 2173 /* 2174 * We will use all the protocol features we support - although 2175 * we suppress F_CONFIG if we know QEMUs internal code can not support 2176 * it. 2177 */ 2178 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2179 2180 if (supports_f_config) { 2181 if (!virtio_has_feature(protocol_features, 2182 VHOST_USER_PROTOCOL_F_CONFIG)) { 2183 error_setg(errp, "vhost-user device expecting " 2184 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2185 "not support it."); 2186 return -EPROTO; 2187 } 2188 } else { 2189 if (virtio_has_feature(protocol_features, 2190 VHOST_USER_PROTOCOL_F_CONFIG)) { 2191 warn_report("vhost-user backend supports " 2192 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2193 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2194 } 2195 } 2196 2197 /* final set of protocol features */ 2198 dev->protocol_features = protocol_features; 2199 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2200 if (err < 0) { 2201 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2202 return -EPROTO; 2203 } 2204 2205 /* query the max queues we support if backend supports Multiple Queue */ 2206 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2207 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2208 &dev->max_queues); 2209 if (err < 0) { 2210 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2211 return -EPROTO; 2212 } 2213 } else { 2214 dev->max_queues = 1; 2215 } 2216 2217 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2218 error_setg(errp, "The maximum number of queues supported by the " 2219 "backend is %" PRIu64, dev->max_queues); 2220 return -EINVAL; 2221 } 2222 2223 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2224 !(virtio_has_feature(dev->protocol_features, 2225 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2226 virtio_has_feature(dev->protocol_features, 2227 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2228 error_setg(errp, "IOMMU support requires reply-ack and " 2229 "backend-req protocol features."); 2230 return -EINVAL; 2231 } 2232 2233 /* get max memory regions if backend supports configurable RAM slots */ 2234 if (!virtio_has_feature(dev->protocol_features, 2235 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2236 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2237 } else { 2238 err = vhost_user_get_max_memslots(dev, &ram_slots); 2239 if (err < 0) { 2240 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2241 return -EPROTO; 2242 } 2243 2244 if (ram_slots < u->user->memory_slots) { 2245 error_setg(errp, "The backend specified a max ram slots limit " 2246 "of %" PRIu64", when the prior validated limit was " 2247 "%d. This limit should never decrease.", ram_slots, 2248 u->user->memory_slots); 2249 return -EINVAL; 2250 } 2251 2252 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2253 } 2254 } 2255 2256 if (dev->migration_blocker == NULL && 2257 !virtio_has_feature(dev->protocol_features, 2258 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2259 error_setg(&dev->migration_blocker, 2260 "Migration disabled: vhost-user backend lacks " 2261 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2262 } 2263 2264 if (dev->vq_index == 0) { 2265 err = vhost_setup_backend_channel(dev); 2266 if (err < 0) { 2267 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2268 return -EPROTO; 2269 } 2270 } 2271 2272 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2273 postcopy_add_notifier(&u->postcopy_notifier); 2274 2275 return 0; 2276 } 2277 2278 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2279 { 2280 struct vhost_user *u; 2281 2282 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2283 2284 u = dev->opaque; 2285 if (u->postcopy_notifier.notify) { 2286 postcopy_remove_notifier(&u->postcopy_notifier); 2287 u->postcopy_notifier.notify = NULL; 2288 } 2289 u->postcopy_listen = false; 2290 if (u->postcopy_fd.handler) { 2291 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2292 close(u->postcopy_fd.fd); 2293 u->postcopy_fd.handler = NULL; 2294 } 2295 if (u->backend_ioc) { 2296 close_backend_channel(u); 2297 } 2298 g_free(u->region_rb); 2299 u->region_rb = NULL; 2300 g_free(u->region_rb_offset); 2301 u->region_rb_offset = NULL; 2302 u->region_rb_len = 0; 2303 g_free(u); 2304 dev->opaque = 0; 2305 2306 return 0; 2307 } 2308 2309 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2310 { 2311 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2312 2313 return idx; 2314 } 2315 2316 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2317 { 2318 struct vhost_user *u = dev->opaque; 2319 2320 return u->user->memory_slots; 2321 } 2322 2323 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2324 { 2325 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2326 2327 return virtio_has_feature(dev->protocol_features, 2328 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2329 } 2330 2331 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2332 { 2333 VhostUserMsg msg = { }; 2334 2335 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2336 2337 /* If guest supports GUEST_ANNOUNCE do nothing */ 2338 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2339 return 0; 2340 } 2341 2342 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2343 if (virtio_has_feature(dev->protocol_features, 2344 VHOST_USER_PROTOCOL_F_RARP)) { 2345 msg.hdr.request = VHOST_USER_SEND_RARP; 2346 msg.hdr.flags = VHOST_USER_VERSION; 2347 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2348 msg.hdr.size = sizeof(msg.payload.u64); 2349 2350 return vhost_user_write(dev, &msg, NULL, 0); 2351 } 2352 return -ENOTSUP; 2353 } 2354 2355 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2356 { 2357 VhostUserMsg msg; 2358 bool reply_supported = virtio_has_feature(dev->protocol_features, 2359 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2360 int ret; 2361 2362 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2363 return 0; 2364 } 2365 2366 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2367 msg.payload.u64 = mtu; 2368 msg.hdr.size = sizeof(msg.payload.u64); 2369 msg.hdr.flags = VHOST_USER_VERSION; 2370 if (reply_supported) { 2371 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2372 } 2373 2374 ret = vhost_user_write(dev, &msg, NULL, 0); 2375 if (ret < 0) { 2376 return ret; 2377 } 2378 2379 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2380 if (reply_supported) { 2381 return process_message_reply(dev, &msg); 2382 } 2383 2384 return 0; 2385 } 2386 2387 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2388 struct vhost_iotlb_msg *imsg) 2389 { 2390 int ret; 2391 VhostUserMsg msg = { 2392 .hdr.request = VHOST_USER_IOTLB_MSG, 2393 .hdr.size = sizeof(msg.payload.iotlb), 2394 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2395 .payload.iotlb = *imsg, 2396 }; 2397 2398 ret = vhost_user_write(dev, &msg, NULL, 0); 2399 if (ret < 0) { 2400 return ret; 2401 } 2402 2403 return process_message_reply(dev, &msg); 2404 } 2405 2406 2407 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2408 { 2409 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2410 } 2411 2412 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2413 uint32_t config_len, Error **errp) 2414 { 2415 int ret; 2416 VhostUserMsg msg = { 2417 .hdr.request = VHOST_USER_GET_CONFIG, 2418 .hdr.flags = VHOST_USER_VERSION, 2419 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2420 }; 2421 2422 if (!virtio_has_feature(dev->protocol_features, 2423 VHOST_USER_PROTOCOL_F_CONFIG)) { 2424 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2425 return -EINVAL; 2426 } 2427 2428 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2429 2430 msg.payload.config.offset = 0; 2431 msg.payload.config.size = config_len; 2432 ret = vhost_user_write(dev, &msg, NULL, 0); 2433 if (ret < 0) { 2434 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2435 return ret; 2436 } 2437 2438 ret = vhost_user_read(dev, &msg); 2439 if (ret < 0) { 2440 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2441 return ret; 2442 } 2443 2444 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2445 error_setg(errp, 2446 "Received unexpected msg type. Expected %d received %d", 2447 VHOST_USER_GET_CONFIG, msg.hdr.request); 2448 return -EPROTO; 2449 } 2450 2451 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2452 error_setg(errp, "Received bad msg size."); 2453 return -EPROTO; 2454 } 2455 2456 memcpy(config, msg.payload.config.region, config_len); 2457 2458 return 0; 2459 } 2460 2461 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2462 uint32_t offset, uint32_t size, uint32_t flags) 2463 { 2464 int ret; 2465 uint8_t *p; 2466 bool reply_supported = virtio_has_feature(dev->protocol_features, 2467 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2468 2469 VhostUserMsg msg = { 2470 .hdr.request = VHOST_USER_SET_CONFIG, 2471 .hdr.flags = VHOST_USER_VERSION, 2472 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2473 }; 2474 2475 if (!virtio_has_feature(dev->protocol_features, 2476 VHOST_USER_PROTOCOL_F_CONFIG)) { 2477 return -ENOTSUP; 2478 } 2479 2480 if (reply_supported) { 2481 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2482 } 2483 2484 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2485 return -EINVAL; 2486 } 2487 2488 msg.payload.config.offset = offset, 2489 msg.payload.config.size = size, 2490 msg.payload.config.flags = flags, 2491 p = msg.payload.config.region; 2492 memcpy(p, data, size); 2493 2494 ret = vhost_user_write(dev, &msg, NULL, 0); 2495 if (ret < 0) { 2496 return ret; 2497 } 2498 2499 if (reply_supported) { 2500 return process_message_reply(dev, &msg); 2501 } 2502 2503 return 0; 2504 } 2505 2506 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2507 void *session_info, 2508 uint64_t *session_id) 2509 { 2510 int ret; 2511 bool crypto_session = virtio_has_feature(dev->protocol_features, 2512 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2513 CryptoDevBackendSessionInfo *backend_info = session_info; 2514 VhostUserMsg msg = { 2515 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2516 .hdr.flags = VHOST_USER_VERSION, 2517 .hdr.size = sizeof(msg.payload.session), 2518 }; 2519 2520 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2521 2522 if (!crypto_session) { 2523 error_report("vhost-user trying to send unhandled ioctl"); 2524 return -ENOTSUP; 2525 } 2526 2527 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2528 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2529 size_t keylen; 2530 2531 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2532 sizeof(CryptoDevBackendAsymSessionInfo)); 2533 if (sess->keylen) { 2534 keylen = sizeof(msg.payload.session.u.asym.key); 2535 if (sess->keylen > keylen) { 2536 error_report("Unsupported asymmetric key size"); 2537 return -ENOTSUP; 2538 } 2539 2540 memcpy(&msg.payload.session.u.asym.key, sess->key, 2541 sess->keylen); 2542 } 2543 } else { 2544 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2545 size_t keylen; 2546 2547 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2548 sizeof(CryptoDevBackendSymSessionInfo)); 2549 if (sess->key_len) { 2550 keylen = sizeof(msg.payload.session.u.sym.key); 2551 if (sess->key_len > keylen) { 2552 error_report("Unsupported cipher key size"); 2553 return -ENOTSUP; 2554 } 2555 2556 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2557 sess->key_len); 2558 } 2559 2560 if (sess->auth_key_len > 0) { 2561 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2562 if (sess->auth_key_len > keylen) { 2563 error_report("Unsupported auth key size"); 2564 return -ENOTSUP; 2565 } 2566 2567 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2568 sess->auth_key_len); 2569 } 2570 } 2571 2572 msg.payload.session.op_code = backend_info->op_code; 2573 msg.payload.session.session_id = backend_info->session_id; 2574 ret = vhost_user_write(dev, &msg, NULL, 0); 2575 if (ret < 0) { 2576 error_report("vhost_user_write() return %d, create session failed", 2577 ret); 2578 return ret; 2579 } 2580 2581 ret = vhost_user_read(dev, &msg); 2582 if (ret < 0) { 2583 error_report("vhost_user_read() return %d, create session failed", 2584 ret); 2585 return ret; 2586 } 2587 2588 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2589 error_report("Received unexpected msg type. Expected %d received %d", 2590 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2591 return -EPROTO; 2592 } 2593 2594 if (msg.hdr.size != sizeof(msg.payload.session)) { 2595 error_report("Received bad msg size."); 2596 return -EPROTO; 2597 } 2598 2599 if (msg.payload.session.session_id < 0) { 2600 error_report("Bad session id: %" PRId64 "", 2601 msg.payload.session.session_id); 2602 return -EINVAL; 2603 } 2604 *session_id = msg.payload.session.session_id; 2605 2606 return 0; 2607 } 2608 2609 static int 2610 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2611 { 2612 int ret; 2613 bool crypto_session = virtio_has_feature(dev->protocol_features, 2614 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2615 VhostUserMsg msg = { 2616 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2617 .hdr.flags = VHOST_USER_VERSION, 2618 .hdr.size = sizeof(msg.payload.u64), 2619 }; 2620 msg.payload.u64 = session_id; 2621 2622 if (!crypto_session) { 2623 error_report("vhost-user trying to send unhandled ioctl"); 2624 return -ENOTSUP; 2625 } 2626 2627 ret = vhost_user_write(dev, &msg, NULL, 0); 2628 if (ret < 0) { 2629 error_report("vhost_user_write() return %d, close session failed", 2630 ret); 2631 return ret; 2632 } 2633 2634 return 0; 2635 } 2636 2637 static bool vhost_user_no_private_memslots(struct vhost_dev *dev) 2638 { 2639 return true; 2640 } 2641 2642 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2643 uint16_t queue_size, 2644 struct vhost_inflight *inflight) 2645 { 2646 void *addr; 2647 int fd; 2648 int ret; 2649 struct vhost_user *u = dev->opaque; 2650 CharBackend *chr = u->user->chr; 2651 VhostUserMsg msg = { 2652 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2653 .hdr.flags = VHOST_USER_VERSION, 2654 .payload.inflight.num_queues = dev->nvqs, 2655 .payload.inflight.queue_size = queue_size, 2656 .hdr.size = sizeof(msg.payload.inflight), 2657 }; 2658 2659 if (!virtio_has_feature(dev->protocol_features, 2660 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2661 return 0; 2662 } 2663 2664 ret = vhost_user_write(dev, &msg, NULL, 0); 2665 if (ret < 0) { 2666 return ret; 2667 } 2668 2669 ret = vhost_user_read(dev, &msg); 2670 if (ret < 0) { 2671 return ret; 2672 } 2673 2674 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2675 error_report("Received unexpected msg type. " 2676 "Expected %d received %d", 2677 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2678 return -EPROTO; 2679 } 2680 2681 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2682 error_report("Received bad msg size."); 2683 return -EPROTO; 2684 } 2685 2686 if (!msg.payload.inflight.mmap_size) { 2687 return 0; 2688 } 2689 2690 fd = qemu_chr_fe_get_msgfd(chr); 2691 if (fd < 0) { 2692 error_report("Failed to get mem fd"); 2693 return -EIO; 2694 } 2695 2696 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2697 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2698 2699 if (addr == MAP_FAILED) { 2700 error_report("Failed to mmap mem fd"); 2701 close(fd); 2702 return -EFAULT; 2703 } 2704 2705 inflight->addr = addr; 2706 inflight->fd = fd; 2707 inflight->size = msg.payload.inflight.mmap_size; 2708 inflight->offset = msg.payload.inflight.mmap_offset; 2709 inflight->queue_size = queue_size; 2710 2711 return 0; 2712 } 2713 2714 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2715 struct vhost_inflight *inflight) 2716 { 2717 VhostUserMsg msg = { 2718 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2719 .hdr.flags = VHOST_USER_VERSION, 2720 .payload.inflight.mmap_size = inflight->size, 2721 .payload.inflight.mmap_offset = inflight->offset, 2722 .payload.inflight.num_queues = dev->nvqs, 2723 .payload.inflight.queue_size = inflight->queue_size, 2724 .hdr.size = sizeof(msg.payload.inflight), 2725 }; 2726 2727 if (!virtio_has_feature(dev->protocol_features, 2728 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2729 return 0; 2730 } 2731 2732 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2733 } 2734 2735 static void vhost_user_state_destroy(gpointer data) 2736 { 2737 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2738 if (n) { 2739 vhost_user_host_notifier_remove(n, NULL); 2740 object_unparent(OBJECT(&n->mr)); 2741 /* 2742 * We can't free until vhost_user_host_notifier_remove has 2743 * done it's thing so schedule the free with RCU. 2744 */ 2745 g_free_rcu(n, rcu); 2746 } 2747 } 2748 2749 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2750 { 2751 if (user->chr) { 2752 error_setg(errp, "Cannot initialize vhost-user state"); 2753 return false; 2754 } 2755 user->chr = chr; 2756 user->memory_slots = 0; 2757 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2758 &vhost_user_state_destroy); 2759 return true; 2760 } 2761 2762 void vhost_user_cleanup(VhostUserState *user) 2763 { 2764 if (!user->chr) { 2765 return; 2766 } 2767 memory_region_transaction_begin(); 2768 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2769 memory_region_transaction_commit(); 2770 user->chr = NULL; 2771 } 2772 2773 2774 typedef struct { 2775 vu_async_close_fn cb; 2776 DeviceState *dev; 2777 CharBackend *cd; 2778 struct vhost_dev *vhost; 2779 IOEventHandler *event_cb; 2780 } VhostAsyncCallback; 2781 2782 static void vhost_user_async_close_bh(void *opaque) 2783 { 2784 VhostAsyncCallback *data = opaque; 2785 struct vhost_dev *vhost = data->vhost; 2786 2787 /* 2788 * If the vhost_dev has been cleared in the meantime there is 2789 * nothing left to do as some other path has completed the 2790 * cleanup. 2791 */ 2792 if (vhost->vdev) { 2793 data->cb(data->dev); 2794 } else if (data->event_cb) { 2795 qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, 2796 NULL, data->dev, NULL, true); 2797 } 2798 2799 g_free(data); 2800 } 2801 2802 /* 2803 * We only schedule the work if the machine is running. If suspended 2804 * we want to keep all the in-flight data as is for migration 2805 * purposes. 2806 */ 2807 void vhost_user_async_close(DeviceState *d, 2808 CharBackend *chardev, struct vhost_dev *vhost, 2809 vu_async_close_fn cb, 2810 IOEventHandler *event_cb) 2811 { 2812 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2813 /* 2814 * A close event may happen during a read/write, but vhost 2815 * code assumes the vhost_dev remains setup, so delay the 2816 * stop & clear. 2817 */ 2818 AioContext *ctx = qemu_get_current_aio_context(); 2819 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2820 2821 /* Save data for the callback */ 2822 data->cb = cb; 2823 data->dev = d; 2824 data->cd = chardev; 2825 data->vhost = vhost; 2826 data->event_cb = event_cb; 2827 2828 /* Disable any further notifications on the chardev */ 2829 qemu_chr_fe_set_handlers(chardev, 2830 NULL, NULL, NULL, NULL, NULL, NULL, 2831 false); 2832 2833 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2834 2835 /* 2836 * Move vhost device to the stopped state. The vhost-user device 2837 * will be clean up and disconnected in BH. This can be useful in 2838 * the vhost migration code. If disconnect was caught there is an 2839 * option for the general vhost code to get the dev state without 2840 * knowing its type (in this case vhost-user). 2841 * 2842 * Note if the vhost device is fully cleared by the time we 2843 * execute the bottom half we won't continue with the cleanup. 2844 */ 2845 vhost->started = false; 2846 } 2847 } 2848 2849 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2850 { 2851 if (!virtio_has_feature(dev->protocol_features, 2852 VHOST_USER_PROTOCOL_F_STATUS)) { 2853 return 0; 2854 } 2855 2856 /* Set device status only for last queue pair */ 2857 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2858 return 0; 2859 } 2860 2861 if (started) { 2862 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2863 VIRTIO_CONFIG_S_DRIVER | 2864 VIRTIO_CONFIG_S_DRIVER_OK); 2865 } else { 2866 return 0; 2867 } 2868 } 2869 2870 static void vhost_user_reset_status(struct vhost_dev *dev) 2871 { 2872 /* Set device status only for last queue pair */ 2873 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2874 return; 2875 } 2876 2877 if (virtio_has_feature(dev->protocol_features, 2878 VHOST_USER_PROTOCOL_F_STATUS)) { 2879 vhost_user_set_status(dev, 0); 2880 } 2881 } 2882 2883 static bool vhost_user_supports_device_state(struct vhost_dev *dev) 2884 { 2885 return virtio_has_feature(dev->protocol_features, 2886 VHOST_USER_PROTOCOL_F_DEVICE_STATE); 2887 } 2888 2889 static int vhost_user_set_device_state_fd(struct vhost_dev *dev, 2890 VhostDeviceStateDirection direction, 2891 VhostDeviceStatePhase phase, 2892 int fd, 2893 int *reply_fd, 2894 Error **errp) 2895 { 2896 int ret; 2897 struct vhost_user *vu = dev->opaque; 2898 VhostUserMsg msg = { 2899 .hdr = { 2900 .request = VHOST_USER_SET_DEVICE_STATE_FD, 2901 .flags = VHOST_USER_VERSION, 2902 .size = sizeof(msg.payload.transfer_state), 2903 }, 2904 .payload.transfer_state = { 2905 .direction = direction, 2906 .phase = phase, 2907 }, 2908 }; 2909 2910 *reply_fd = -1; 2911 2912 if (!vhost_user_supports_device_state(dev)) { 2913 close(fd); 2914 error_setg(errp, "Back-end does not support migration state transfer"); 2915 return -ENOTSUP; 2916 } 2917 2918 ret = vhost_user_write(dev, &msg, &fd, 1); 2919 close(fd); 2920 if (ret < 0) { 2921 error_setg_errno(errp, -ret, 2922 "Failed to send SET_DEVICE_STATE_FD message"); 2923 return ret; 2924 } 2925 2926 ret = vhost_user_read(dev, &msg); 2927 if (ret < 0) { 2928 error_setg_errno(errp, -ret, 2929 "Failed to receive SET_DEVICE_STATE_FD reply"); 2930 return ret; 2931 } 2932 2933 if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) { 2934 error_setg(errp, 2935 "Received unexpected message type, expected %d, received %d", 2936 VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request); 2937 return -EPROTO; 2938 } 2939 2940 if (msg.hdr.size != sizeof(msg.payload.u64)) { 2941 error_setg(errp, 2942 "Received bad message size, expected %zu, received %" PRIu32, 2943 sizeof(msg.payload.u64), msg.hdr.size); 2944 return -EPROTO; 2945 } 2946 2947 if ((msg.payload.u64 & 0xff) != 0) { 2948 error_setg(errp, "Back-end did not accept migration state transfer"); 2949 return -EIO; 2950 } 2951 2952 if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { 2953 *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr); 2954 if (*reply_fd < 0) { 2955 error_setg(errp, 2956 "Failed to get back-end-provided transfer pipe FD"); 2957 *reply_fd = -1; 2958 return -EIO; 2959 } 2960 } 2961 2962 return 0; 2963 } 2964 2965 static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp) 2966 { 2967 int ret; 2968 VhostUserMsg msg = { 2969 .hdr = { 2970 .request = VHOST_USER_CHECK_DEVICE_STATE, 2971 .flags = VHOST_USER_VERSION, 2972 .size = 0, 2973 }, 2974 }; 2975 2976 if (!vhost_user_supports_device_state(dev)) { 2977 error_setg(errp, "Back-end does not support migration state transfer"); 2978 return -ENOTSUP; 2979 } 2980 2981 ret = vhost_user_write(dev, &msg, NULL, 0); 2982 if (ret < 0) { 2983 error_setg_errno(errp, -ret, 2984 "Failed to send CHECK_DEVICE_STATE message"); 2985 return ret; 2986 } 2987 2988 ret = vhost_user_read(dev, &msg); 2989 if (ret < 0) { 2990 error_setg_errno(errp, -ret, 2991 "Failed to receive CHECK_DEVICE_STATE reply"); 2992 return ret; 2993 } 2994 2995 if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) { 2996 error_setg(errp, 2997 "Received unexpected message type, expected %d, received %d", 2998 VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request); 2999 return -EPROTO; 3000 } 3001 3002 if (msg.hdr.size != sizeof(msg.payload.u64)) { 3003 error_setg(errp, 3004 "Received bad message size, expected %zu, received %" PRIu32, 3005 sizeof(msg.payload.u64), msg.hdr.size); 3006 return -EPROTO; 3007 } 3008 3009 if (msg.payload.u64 != 0) { 3010 error_setg(errp, "Back-end failed to process its internal state"); 3011 return -EIO; 3012 } 3013 3014 return 0; 3015 } 3016 3017 const VhostOps user_ops = { 3018 .backend_type = VHOST_BACKEND_TYPE_USER, 3019 .vhost_backend_init = vhost_user_backend_init, 3020 .vhost_backend_cleanup = vhost_user_backend_cleanup, 3021 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 3022 .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, 3023 .vhost_set_log_base = vhost_user_set_log_base, 3024 .vhost_set_mem_table = vhost_user_set_mem_table, 3025 .vhost_set_vring_addr = vhost_user_set_vring_addr, 3026 .vhost_set_vring_endian = vhost_user_set_vring_endian, 3027 .vhost_set_vring_num = vhost_user_set_vring_num, 3028 .vhost_set_vring_base = vhost_user_set_vring_base, 3029 .vhost_get_vring_base = vhost_user_get_vring_base, 3030 .vhost_set_vring_kick = vhost_user_set_vring_kick, 3031 .vhost_set_vring_call = vhost_user_set_vring_call, 3032 .vhost_set_vring_err = vhost_user_set_vring_err, 3033 .vhost_set_features = vhost_user_set_features, 3034 .vhost_get_features = vhost_user_get_features, 3035 .vhost_set_owner = vhost_user_set_owner, 3036 .vhost_reset_device = vhost_user_reset_device, 3037 .vhost_get_vq_index = vhost_user_get_vq_index, 3038 .vhost_set_vring_enable = vhost_user_set_vring_enable, 3039 .vhost_requires_shm_log = vhost_user_requires_shm_log, 3040 .vhost_migration_done = vhost_user_migration_done, 3041 .vhost_net_set_mtu = vhost_user_net_set_mtu, 3042 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 3043 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 3044 .vhost_get_config = vhost_user_get_config, 3045 .vhost_set_config = vhost_user_set_config, 3046 .vhost_crypto_create_session = vhost_user_crypto_create_session, 3047 .vhost_crypto_close_session = vhost_user_crypto_close_session, 3048 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 3049 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 3050 .vhost_dev_start = vhost_user_dev_start, 3051 .vhost_reset_status = vhost_user_reset_status, 3052 .vhost_supports_device_state = vhost_user_supports_device_state, 3053 .vhost_set_device_state_fd = vhost_user_set_device_state_fd, 3054 .vhost_check_device_state = vhost_user_check_device_state, 3055 }; 3056